summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--CREDITS5
-rw-r--r--Documentation/ABI/stable/sysfs-driver-dma-idxd52
-rw-r--r--Documentation/ABI/testing/debugfs-driver-habanalabs23
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-filter-admv881816
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-admv101338
-rw-r--r--Documentation/ABI/testing/sysfs-bus-vdpa57
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs12
-rw-r--r--Documentation/accounting/delay-accounting.rst55
-rw-r--r--Documentation/accounting/psi.rst3
-rw-r--r--Documentation/admin-guide/cgroup-v1/hugetlb.rst4
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst11
-rw-r--r--Documentation/admin-guide/mm/damon/reclaim.rst25
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst225
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst16
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst2
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ps8640.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml8
-rw-r--r--Documentation/devicetree/bindings/dma/arm,pl330.yaml83
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl08x.yaml4
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl330.txt49
-rw-r--r--Documentation/devicetree/bindings/dma/dma-controller.yaml8
-rw-r--r--Documentation/devicetree/bindings/dma/ingenic,dma.yaml42
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml5
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml3
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml1
-rw-r--r--Documentation/devicetree/bindings/eeprom/at24.yaml29
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.txt53
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml133
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml227
-rw-r--r--Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml158
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml217
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml66
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml91
-rw-r--r--Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.yaml6
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml115
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zinitix.txt40
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml1
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml137
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml143
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml11
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml185
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml12
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,madera.yaml3
-rw-r--r--Documentation/devicetree/bindings/mfd/google,cros-ec.yaml1
-rw-r--r--Documentation/devicetree/bindings/mmc/arm,pl18x.yaml6
-rw-r--r--Documentation/devicetree/bindings/mux/gpio-mux.yaml11
-rw-r--r--Documentation/devicetree/bindings/mux/mux-consumer.yaml21
-rw-r--r--Documentation/devicetree/bindings/mux/mux-controller.yaml26
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt9
-rw-r--r--Documentation/devicetree/bindings/net/oxnas-dwmac.txt3
-rw-r--r--Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml3
-rw-r--r--Documentation/devicetree/bindings/nvmem/mtk-efuse.txt2
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem.yaml17
-rw-r--r--Documentation/devicetree/bindings/nvmem/rmem.yaml3
-rw-r--r--Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml27
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml6
-rw-r--r--Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml8
-rw-r--r--Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml16
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson8-hdmi-tx-phy.yaml65
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml92
-rw-r--r--Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml46
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,tphy.yaml18
-rw-r--r--Documentation/devicetree/bindings/phy/microchip,lan966x-serdes.yaml59
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml9
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml4
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml44
-rw-r--r--Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt28
-rw-r--r--Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml44
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml67
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml6
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml21
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml10
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-restart.yaml4
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml28
-rw-r--r--Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml65
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml8
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rx8900.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml9
-rw-r--r--Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/samsung-i2s.yaml6
-rw-r--r--Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml76
-rw-r--r--Documentation/devicetree/bindings/spmi/spmi.yaml3
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml8
-rw-r--r--Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt1
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml10
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml43
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml7
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml2
-rw-r--r--Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml91
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml75
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml48
-rw-r--r--Documentation/driver-api/dmaengine/dmatest.rst17
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst23
-rw-r--r--Documentation/driver-api/firewire.rst4
-rw-r--r--Documentation/driver-api/fpga/fpga-bridge.rst6
-rw-r--r--Documentation/driver-api/fpga/fpga-mgr.rst38
-rw-r--r--Documentation/driver-api/fpga/fpga-region.rst12
-rw-r--r--Documentation/driver-api/generic-counter.rst10
-rw-r--r--Documentation/filesystems/ceph.rst25
-rw-r--r--Documentation/filesystems/f2fs.rst1
-rw-r--r--Documentation/filesystems/proc.rst6
-rw-r--r--Documentation/kbuild/kconfig-language.rst2
-rw-r--r--Documentation/livepatch/api.rst30
-rw-r--r--Documentation/livepatch/index.rst1
-rw-r--r--Documentation/livepatch/shadow-vars.rst4
-rw-r--r--Documentation/livepatch/system-state.rst4
-rw-r--r--Documentation/process/changes.rst2
-rw-r--r--Documentation/riscv/vm-layout.rst12
-rw-r--r--Documentation/staging/tee.rst4
-rw-r--r--Documentation/tools/rtla/Makefile41
-rw-r--r--Documentation/tools/rtla/common_appendix.rst12
-rw-r--r--Documentation/tools/rtla/common_hist_options.rst23
-rw-r--r--Documentation/tools/rtla/common_options.rst28
-rw-r--r--Documentation/tools/rtla/common_osnoise_description.rst8
-rw-r--r--Documentation/tools/rtla/common_osnoise_options.rst17
-rw-r--r--Documentation/tools/rtla/common_timerlat_description.rst10
-rw-r--r--Documentation/tools/rtla/common_timerlat_options.rst16
-rw-r--r--Documentation/tools/rtla/common_top_options.rst3
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-hist.rst66
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-top.rst61
-rw-r--r--Documentation/tools/rtla/rtla-osnoise.rst59
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-hist.rst106
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-top.rst145
-rw-r--r--Documentation/tools/rtla/rtla-timerlat.rst57
-rw-r--r--Documentation/tools/rtla/rtla.rst48
-rw-r--r--Documentation/trace/coresight/coresight-config.rst78
-rw-r--r--Documentation/trace/events.rst19
-rw-r--r--Documentation/trace/ftrace.rst2
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/virt/kvm/api.rst87
-rw-r--r--Documentation/virt/kvm/mmu.rst8
-rw-r--r--Documentation/vm/arch_pgtable_helpers.rst20
-rw-r--r--Documentation/vm/cleancache.rst296
-rw-r--r--Documentation/vm/frontswap.rst31
-rw-r--r--Documentation/vm/index.rst3
-rw-r--r--Documentation/vm/page_migration.rst12
-rw-r--r--Documentation/vm/page_table_check.rst56
-rw-r--r--Documentation/vm/vmalloced-kernel-stacks.rst153
-rw-r--r--MAINTAINERS123
-rw-r--r--Makefile21
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/alpha/include/asm/bitops.h2
-rw-r--r--arch/alpha/kernel/rtc.c7
-rw-r--r--arch/alpha/kernel/srm_env.c4
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/alpha/kernel/traps.c6
-rw-r--r--arch/alpha/mm/fault.c18
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/Makefile4
-rw-r--r--arch/arc/boot/dts/Makefile4
-rw-r--r--arch/arc/include/asm/bitops.h1
-rw-r--r--arch/arc/include/asm/irqflags-compact.h8
-rw-r--r--arch/arc/include/asm/perf_event.h162
-rw-r--r--arch/arc/include/asm/thread_info.h4
-rw-r--r--arch/arc/kernel/perf_event.c166
-rw-r--r--arch/arc/kernel/unwind.c11
-rw-r--r--arch/arc/mm/dma.c2
-rw-r--r--arch/arc/mm/fault.c3
-rw-r--r--arch/arc/plat-axs10x/axs10x.c2
-rw-r--r--arch/arc/plat-hsdk/platform.c2
-rw-r--r--arch/arm/Kconfig.debug2
-rw-r--r--arch/arm/boot/compressed/Makefile8
-rw-r--r--arch/arm/configs/bcm2835_defconfig1
-rw-r--r--arch/arm/configs/qcom_defconfig1
-rw-r--r--arch/arm/include/asm/bitops.h1
-rw-r--r--arch/arm/include/asm/io.h5
-rw-r--r--arch/arm/kernel/atags_proc.c2
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm/mach-dove/pcie.c9
-rw-r--r--arch/arm/mach-iop32x/pci.c5
-rw-r--r--arch/arm/mach-mv78xx0/pcie.c5
-rw-r--r--arch/arm/mach-orion5x/pci.c10
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/mm/fault.c4
-rw-r--r--arch/arm/mm/ioremap.c16
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig21
-rw-r--r--arch/arm64/include/asm/atomic_lse.h2
-rw-r--r--arch/arm64/include/asm/bitops.h1
-rw-r--r--arch/arm64/include/asm/cmpxchg.h2
-rw-r--r--arch/arm64/include/asm/hyperv-tlfs.h9
-rw-r--r--arch/arm64/include/asm/kvm_asm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h4
-rw-r--r--arch/arm64/include/asm/kvm_host.h47
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h2
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h30
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h71
-rw-r--r--arch/arm64/include/asm/mmu.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/fpsimd.c6
-rw-r--r--arch/arm64/kernel/module.c4
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kvm/.gitignore2
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/Makefile24
-rw-r--r--arch/arm64/kvm/arch_timer.c13
-rw-r--r--arch/arm64/kvm/arm.c128
-rw-r--r--arch/arm64/kvm/fpsimd.c79
-rw-r--r--arch/arm64/kvm/handle_exit.c5
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-constants.c10
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h30
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h59
-rw-r--r--arch/arm64/kvm/hyp/nvhe/early_alloc.c5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c505
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c25
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c1
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c108
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c1
-rw-r--r--arch/arm64/kvm/mmu.c177
-rw-r--r--arch/arm64/kvm/pkvm.c (renamed from arch/arm64/kvm/hyp/reserved_mem.c)8
-rw-r--r--arch/arm64/kvm/pmu-emul.c3
-rw-r--r--arch/arm64/kvm/psci.c10
-rw-r--r--arch/arm64/kvm/reset.c30
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c15
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.h2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c10
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic.c2
-rw-r--r--arch/arm64/mm/fault.c8
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/csky/abiv1/alignment.c2
-rw-r--r--arch/csky/include/asm/bitops.h1
-rw-r--r--arch/csky/kernel/traps.c2
-rw-r--r--arch/csky/mm/fault.c2
-rw-r--r--arch/h8300/boot/compressed/Makefile4
-rw-r--r--arch/h8300/boot/dts/Makefile6
-rw-r--r--arch/h8300/include/asm/bitops.h1
-rw-r--r--arch/h8300/kernel/traps.c3
-rw-r--r--arch/h8300/mm/fault.c2
-rw-r--r--arch/hexagon/include/asm/bitops.h1
-rw-r--r--arch/hexagon/kernel/traps.c2
-rw-r--r--arch/hexagon/mm/vm_fault.c8
-rw-r--r--arch/ia64/Kconfig9
-rw-r--r--arch/ia64/include/asm/bitops.h2
-rw-r--r--arch/ia64/kernel/mca_drv.c2
-rw-r--r--arch/ia64/kernel/module.c6
-rw-r--r--arch/ia64/kernel/salinfo.c10
-rw-r--r--arch/ia64/kernel/setup.c5
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/ia64/kernel/topology.c3
-rw-r--r--arch/ia64/kernel/traps.c2
-rw-r--r--arch/ia64/kernel/uncached.c2
-rw-r--r--arch/ia64/mm/fault.c18
-rw-r--r--arch/m68k/configs/amiga_defconfig1
-rw-r--r--arch/m68k/configs/apollo_defconfig1
-rw-r--r--arch/m68k/configs/atari_defconfig1
-rw-r--r--arch/m68k/configs/bvme6000_defconfig1
-rw-r--r--arch/m68k/configs/hp300_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig1
-rw-r--r--arch/m68k/configs/multi_defconfig1
-rw-r--r--arch/m68k/configs/mvme147_defconfig1
-rw-r--r--arch/m68k/configs/mvme16x_defconfig1
-rw-r--r--arch/m68k/configs/q40_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig1
-rw-r--r--arch/m68k/include/asm/bitops.h2
-rw-r--r--arch/m68k/kernel/ptrace.c12
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/kernel/traps.c2
-rw-r--r--arch/m68k/mm/fault.c20
-rw-r--r--arch/microblaze/Makefile8
-rw-r--r--arch/microblaze/kernel/exceptions.c4
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/mm/fault.c22
-rw-r--r--arch/mips/Kconfig11
-rw-r--r--arch/mips/bcm63xx/dev-wdt.c8
-rw-r--r--arch/mips/boot/compressed/Makefile12
-rw-r--r--arch/mips/include/asm/bitops.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/kernel/traps.c2
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/Makefile3
-rw-r--r--arch/mips/kvm/emulate.c2
-rw-r--r--arch/mips/kvm/loongson_ipi.c4
-rw-r--r--arch/mips/kvm/mips.c32
-rw-r--r--arch/mips/mm/fault.c19
-rw-r--r--arch/mips/mm/init.c14
-rw-r--r--arch/mips/ralink/mt7621.c31
-rw-r--r--arch/nds32/Makefile6
-rw-r--r--arch/nds32/boot/dts/Makefile7
-rw-r--r--arch/nds32/kernel/fpu.c2
-rw-r--r--arch/nds32/kernel/traps.c8
-rw-r--r--arch/nds32/mm/fault.c18
-rw-r--r--arch/nios2/boot/dts/Makefile2
-rw-r--r--arch/nios2/kernel/traps.c4
-rw-r--r--arch/nios2/mm/fault.c18
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/boot/dts/Makefile7
-rw-r--r--arch/openrisc/include/asm/bitops.h1
-rw-r--r--arch/openrisc/include/asm/syscalls.h2
-rw-r--r--arch/openrisc/kernel/entry.S27
-rw-r--r--arch/openrisc/kernel/time.c4
-rw-r--r--arch/openrisc/kernel/traps.c2
-rw-r--r--arch/openrisc/mm/fault.c18
-rw-r--r--arch/parisc/boot/compressed/Makefile24
-rw-r--r--arch/parisc/include/asm/bitops.h1
-rw-r--r--arch/parisc/include/asm/processor.h1
-rw-r--r--arch/parisc/kernel/setup.c15
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/parisc/kernel/toc.c3
-rw-r--r--arch/parisc/kernel/traps.c2
-rw-r--r--arch/parisc/mm/fault.c18
-rw-r--r--arch/powerpc/Kconfig17
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi2
-rw-r--r--arch/powerpc/boot/dts/wii.dts5
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/wii_defconfig2
-rw-r--r--arch/powerpc/include/asm/bitops.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h14
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c97
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/traps.c8
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile8
-rw-r--r--arch/powerpc/kvm/book3s.c14
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c36
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c14
-rw-r--r--arch/powerpc/kvm/book3s_pr.c13
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/book3s_xics.c6
-rw-r--r--arch/powerpc/kvm/book3s_xics.h2
-rw-r--r--arch/powerpc/kvm/book3s_xive.c15
-rw-r--r--arch/powerpc/kvm/book3s_xive.h4
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c8
-rw-r--r--arch/powerpc/kvm/booke.c9
-rw-r--r--arch/powerpc/kvm/e500_emulate.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c24
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c4
-rw-r--r--arch/riscv/Kconfig64
-rw-r--r--arch/riscv/boot/dts/canaan/Makefile4
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi23
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts2
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts4
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi60
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi40
-rw-r--r--arch/riscv/boot/dts/sifive/fu740-c000.dtsi14
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts5
-rw-r--r--arch/riscv/configs/defconfig13
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig3
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig4
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig3
-rw-r--r--arch/riscv/configs/rv32_defconfig6
-rw-r--r--arch/riscv/errata/alternative.c3
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/asm-extable.h65
-rw-r--r--arch/riscv/include/asm/bitops.h1
-rw-r--r--arch/riscv/include/asm/cpu_ops.h2
-rw-r--r--arch/riscv/include/asm/cpu_ops_sbi.h25
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/extable.h48
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/futex.h30
-rw-r--r--arch/riscv/include/asm/gpr-num.h77
-rw-r--r--arch/riscv/include/asm/kasan.h11
-rw-r--r--arch/riscv/include/asm/kvm_host.h12
-rw-r--r--arch/riscv/include/asm/kvm_types.h2
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h33
-rw-r--r--arch/riscv/include/asm/page.h16
-rw-r--r--arch/riscv/include/asm/pgalloc.h40
-rw-r--r--arch/riscv/include/asm/pgtable-64.h108
-rw-r--r--arch/riscv/include/asm/pgtable-bits.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h85
-rw-r--r--arch/riscv/include/asm/sbi.h52
-rw-r--r--arch/riscv/include/asm/smp.h10
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
-rw-r--r--arch/riscv/include/asm/uaccess.h163
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/asm-offsets.c3
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c8
-rw-r--r--arch/riscv/kernel/cpu.c23
-rw-r--r--arch/riscv/kernel/cpu_ops.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c27
-rw-r--r--arch/riscv/kernel/head.S51
-rw-r--r--arch/riscv/kernel/head.h6
-rw-r--r--arch/riscv/kernel/kexec_relocate.S20
-rw-r--r--arch/riscv/kernel/machine_kexec.c3
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/sbi.c224
-rw-r--r--arch/riscv/kernel/smp.c10
-rw-r--r--arch/riscv/kernel/smpboot.c2
-rw-r--r--arch/riscv/kernel/traps.c2
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S1
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S3
-rw-r--r--arch/riscv/kvm/Makefile10
-rw-r--r--arch/riscv/kvm/main.c8
-rw-r--r--arch/riscv/kvm/mmu.c106
-rw-r--r--arch/riscv/kvm/vcpu.c28
-rw-r--r--arch/riscv/kvm/vcpu_exit.c2
-rw-r--r--arch/riscv/kvm/vcpu_fp.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c213
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c99
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c105
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c132
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c123
-rw-r--r--arch/riscv/kvm/vm.c13
-rw-r--r--arch/riscv/kvm/vmid.c6
-rw-r--r--arch/riscv/lib/uaccess.S28
-rw-r--r--arch/riscv/mm/cacheflush.c5
-rw-r--r--arch/riscv/mm/context.c4
-rw-r--r--arch/riscv/mm/extable.c66
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/riscv/mm/init.c398
-rw-r--r--arch/riscv/mm/kasan_init.c248
-rw-r--r--arch/riscv/mm/tlbflush.c9
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c11
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/compressed/Makefile28
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/include/asm/bitops.h1
-rw-r--r--arch/s390/include/asm/cpu_mf.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/uaccess.h120
-rw-r--r--arch/s390/include/asm/uv.h34
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/module.c5
-rw-r--r--arch/s390/kernel/nmi.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c4
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c6
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Makefile8
-rw-r--r--arch/s390/kvm/gaccess.c158
-rw-r--r--arch/s390/kvm/interrupt.c12
-rw-r--r--arch/s390/kvm/kvm-s390.c163
-rw-r--r--arch/s390/kvm/kvm-s390.h19
-rw-r--r--arch/s390/kvm/pv.c4
-rw-r--r--arch/s390/kvm/sigp.c28
-rw-r--r--arch/s390/lib/uaccess.c24
-rw-r--r--arch/s390/mm/fault.c28
-rw-r--r--arch/sh/boot/Makefile16
-rw-r--r--arch/sh/boot/compressed/Makefile22
-rw-r--r--arch/sh/boot/dts/Makefile4
-rw-r--r--arch/sh/include/asm/bitops.h1
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/kernel/traps.c2
-rw-r--r--arch/sh/mm/alignment.c4
-rw-r--r--arch/sh/mm/fault.c20
-rw-r--r--arch/sparc/Kconfig12
-rw-r--r--arch/sparc/include/asm/bitops_32.h1
-rw-r--r--arch/sparc/include/asm/bitops_64.h2
-rw-r--r--arch/sparc/kernel/led.c8
-rw-r--r--arch/sparc/kernel/smp_64.c103
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/traps_32.c4
-rw-r--r--arch/sparc/kernel/traps_64.c4
-rw-r--r--arch/sparc/mm/fault_32.c16
-rw-r--r--arch/sparc/mm/fault_64.c16
-rw-r--r--arch/um/drivers/virt-pci.c2
-rw-r--r--arch/um/kernel/trap.c8
-rw-r--r--arch/x86/Kconfig21
-rw-r--r--arch/x86/boot/compressed/Makefile12
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/events/intel/core.c15
-rw-r--r--arch/x86/events/intel/lbr.c168
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore.h3
-rw-r--r--arch/x86/events/intel/uncore_discovery.c4
-rw-r--r--arch/x86/events/intel/uncore_discovery.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c214
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/events/rapl.c9
-rw-r--r--arch/x86/hyperv/hv_init.c14
-rw-r--r--arch/x86/hyperv/ivm.c28
-rw-r--r--arch/x86/hyperv/mmu.c19
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h11
-rw-r--r--arch/x86/include/asm/fpu/types.h32
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h33
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h78
-rw-r--r--arch/x86/include/asm/kvm_page_track.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h9
-rw-r--r--arch/x86/include/asm/pgtable.h31
-rw-r--r--arch/x86/include/uapi/asm/kvm.h16
-rw-r--r--arch/x86/include/uapi/asm/prctl.h26
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/cc_platform.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c15
-rw-r--r--arch/x86/kernel/dumpstack.c4
-rw-r--r--arch/x86/kernel/early-quirks.c10
-rw-r--r--arch/x86/kernel/fpu/core.c99
-rw-r--r--arch/x86/kernel/fpu/xstate.c147
-rw-r--r--arch/x86/kernel/fpu/xstate.h19
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/kvm.c6
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/module.c7
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/resource.c23
-rw-r--r--arch/x86/kernel/setup_percpu.c66
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/Makefile7
-rw-r--r--arch/x86/kvm/cpuid.c163
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/debugfs.c6
-rw-r--r--arch/x86/kvm/emulate.c55
-rw-r--r--arch/x86/kvm/hyperv.c9
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/i8259.c5
-rw-r--r--arch/x86/kvm/ioapic.c4
-rw-r--r--arch/x86/kvm/irq_comm.c19
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h20
-rw-r--r--arch/x86/kvm/kvm_emulate.h1
-rw-r--r--arch/x86/kvm/kvm_onhyperv.c3
-rw-r--r--arch/x86/kvm/lapic.c65
-rw-r--r--arch/x86/kvm/mmu.h16
-rw-r--r--arch/x86/kvm/mmu/mmu.c182
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h9
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h2
-rw-r--r--arch/x86/kvm/mmu/page_track.c8
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h48
-rw-r--r--arch/x86/kvm/mmu/spte.c7
-rw-r--r--arch/x86/kvm/mmu/spte.h44
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c8
-rw-r--r--arch/x86/kvm/pmu.c161
-rw-r--r--arch/x86/kvm/pmu.h5
-rw-r--r--arch/x86/kvm/svm/avic.c139
-rw-r--r--arch/x86/kvm/svm/nested.c262
-rw-r--r--arch/x86/kvm/svm/pmu.c23
-rw-r--r--arch/x86/kvm/svm/sev.c9
-rw-r--r--arch/x86/kvm/svm/svm.c528
-rw-r--r--arch/x86/kvm/svm/svm.h86
-rw-r--r--arch/x86/kvm/trace.h24
-rw-r--r--arch/x86/kvm/vmx/capabilities.h13
-rw-r--r--arch/x86/kvm/vmx/nested.c65
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c63
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c268
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h14
-rw-r--r--arch/x86/kvm/vmx/vmcs.h5
-rw-r--r--arch/x86/kvm/vmx/vmx.c284
-rw-r--r--arch/x86/kvm/vmx/vmx.h47
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h27
-rw-r--r--arch/x86/kvm/x86.c475
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/kvm/xen.c341
-rw-r--r--arch/x86/kvm/xen.h9
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/um/Kconfig1
-rw-r--r--arch/xtensa/Makefile2
-rw-r--r--arch/xtensa/boot/dts/Makefile5
-rw-r--r--arch/xtensa/include/asm/bitops.h1
-rw-r--r--arch/xtensa/kernel/entry.S2
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/xtensa/kernel/traps.c2
-rw-r--r--arch/xtensa/mm/fault.c17
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c4
-rw-r--r--block/bdev.c5
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-mq-tag.c40
-rw-r--r--block/blk-mq.c3
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/mq-deadline.c4
-rw-r--r--certs/.gitignore1
-rw-r--r--certs/Kconfig4
-rw-r--r--certs/Makefile125
-rw-r--r--certs/default_x509.genkey17
-rw-r--r--certs/extract-cert.c (renamed from scripts/extract-cert.c)2
-rw-r--r--crypto/Kconfig2
-rw-r--r--crypto/algboss.c4
-rw-r--r--drivers/accessibility/speakup/speakup_acntpc.c2
-rw-r--r--drivers/accessibility/speakup/speakup_dtlk.c2
-rw-r--r--drivers/accessibility/speakup/speakup_keypc.c2
-rw-r--r--drivers/acpi/Kconfig22
-rw-r--r--drivers/acpi/Makefile3
-rw-r--r--drivers/acpi/acpi_apd.c2
-rw-r--r--drivers/acpi/acpi_pcc.c2
-rw-r--r--drivers/acpi/cppc_acpi.c11
-rw-r--r--drivers/acpi/dptf/dptf_pch_fivr.c1
-rw-r--r--drivers/acpi/dptf/dptf_power.c2
-rw-r--r--drivers/acpi/dptf/int340x_thermal.c6
-rw-r--r--drivers/acpi/fan.h1
-rw-r--r--drivers/acpi/internal.h2
-rw-r--r--drivers/acpi/pfr_telemetry.c435
-rw-r--r--drivers/acpi/pfr_update.c575
-rw-r--r--drivers/acpi/proc.c2
-rw-r--r--drivers/acpi/scan.c76
-rw-r--r--drivers/acpi/spcr.c9
-rw-r--r--drivers/android/binder.c437
-rw-r--r--drivers/ata/Kconfig44
-rw-r--r--drivers/ata/acard-ahci.c4
-rw-r--r--drivers/ata/ahci.c24
-rw-r--r--drivers/ata/ahci_brcm.c4
-rw-r--r--drivers/ata/ahci_ceva.c5
-rw-r--r--drivers/ata/ahci_qoriq.c4
-rw-r--r--drivers/ata/ahci_xgene.c12
-rw-r--r--drivers/ata/ata_piix.c11
-rw-r--r--drivers/ata/libahci.c33
-rw-r--r--drivers/ata/libahci_platform.c14
-rw-r--r--drivers/ata/libata-acpi.c69
-rw-r--r--drivers/ata/libata-core.c231
-rw-r--r--drivers/ata/libata-eh.c72
-rw-r--r--drivers/ata/libata-pmp.c8
-rw-r--r--drivers/ata/libata-sata.c11
-rw-r--r--drivers/ata/libata-scsi.c170
-rw-r--r--drivers/ata/libata-sff.c88
-rw-r--r--drivers/ata/libata-trace.c47
-rw-r--r--drivers/ata/libata-transport.c48
-rw-r--r--drivers/ata/libata.h5
-rw-r--r--drivers/ata/pata_ali.c4
-rw-r--r--drivers/ata/pata_arasan_cf.c3
-rw-r--r--drivers/ata/pata_atp867x.c105
-rw-r--r--drivers/ata/pata_cmd640.c2
-rw-r--r--drivers/ata/pata_cmd64x.c4
-rw-r--r--drivers/ata/pata_cs5520.c4
-rw-r--r--drivers/ata/pata_cs5536.c4
-rw-r--r--drivers/ata/pata_cypress.c2
-rw-r--r--drivers/ata/pata_ep93xx.c1
-rw-r--r--drivers/ata/pata_hpt366.c5
-rw-r--r--drivers/ata/pata_hpt37x.c20
-rw-r--r--drivers/ata/pata_hpt3x2n.c12
-rw-r--r--drivers/ata/pata_it821x.c66
-rw-r--r--drivers/ata/pata_ixp4xx_cf.c6
-rw-r--r--drivers/ata/pata_marvell.c9
-rw-r--r--drivers/ata/pata_netcell.c5
-rw-r--r--drivers/ata/pata_octeon_cf.c54
-rw-r--r--drivers/ata/pata_of_platform.c15
-rw-r--r--drivers/ata/pata_pdc2027x.c71
-rw-r--r--drivers/ata/pata_pdc202xx_old.c2
-rw-r--r--drivers/ata/pata_rz1000.c4
-rw-r--r--drivers/ata/pata_serverworks.c4
-rw-r--r--drivers/ata/pata_sil680.c9
-rw-r--r--drivers/ata/pata_via.c12
-rw-r--r--drivers/ata/pdc_adma.c33
-rw-r--r--drivers/ata/sata_dwc_460ex.c165
-rw-r--r--drivers/ata/sata_fsl.c212
-rw-r--r--drivers/ata/sata_gemini.c4
-rw-r--r--drivers/ata/sata_inic162x.c4
-rw-r--r--drivers/ata/sata_mv.c132
-rw-r--r--drivers/ata/sata_nv.c54
-rw-r--r--drivers/ata/sata_promise.c31
-rw-r--r--drivers/ata/sata_qstor.c15
-rw-r--r--drivers/ata/sata_rcar.c26
-rw-r--r--drivers/ata/sata_sil.c1
-rw-r--r--drivers/ata/sata_sil24.c5
-rw-r--r--drivers/ata/sata_sx4.c148
-rw-r--r--drivers/atm/iphase.c4
-rw-r--r--drivers/base/arch_numa.c68
-rw-r--r--drivers/base/devtmpfs.c7
-rw-r--r--drivers/base/firmware_loader/builtin/Makefile4
-rw-r--r--drivers/base/firmware_loader/fallback.c7
-rw-r--r--drivers/base/firmware_loader/fallback.h11
-rw-r--r--drivers/base/firmware_loader/fallback_table.c25
-rw-r--r--drivers/base/power/trace.c6
-rw-r--r--drivers/base/property.c16
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c73
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/paride/bpck.c1
-rw-r--r--drivers/block/rbd.c5
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/zram/zram_drv.c11
-rw-r--r--drivers/bluetooth/virtio_bt.c2
-rw-r--r--drivers/bus/mhi/core/boot.c2
-rw-r--r--drivers/bus/mhi/core/init.c4
-rw-r--r--drivers/bus/mhi/core/internal.h9
-rw-r--r--drivers/bus/mhi/core/main.c24
-rw-r--r--drivers/bus/mhi/core/pm.c39
-rw-r--r--drivers/bus/mhi/pci_generic.c56
-rw-r--r--drivers/bus/mvebu-mbus.c5
-rw-r--r--drivers/cdrom/cdrom.c23
-rw-r--r--drivers/char/agp/amd64-agp.c24
-rw-r--r--drivers/char/agp/sis-agp.c25
-rw-r--r--drivers/char/agp/via-agp.c25
-rw-r--r--drivers/char/applicom.c4
-rw-r--r--drivers/char/hpet.c22
-rw-r--r--drivers/char/hw_random/virtio-rng.c2
-rw-r--r--drivers/char/mwave/3780i.h2
-rw-r--r--drivers/char/random.c641
-rw-r--r--drivers/char/virtio_console.c4
-rw-r--r--drivers/clk/clk-si5341.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-apmixed.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-infracfg.c2
-rw-r--r--drivers/clk/mediatek/clk-mt7986-topckgen.c2
-rw-r--r--drivers/clk/visconti/pll.c3
-rw-r--r--drivers/comedi/comedi_buf.c3
-rw-r--r--drivers/comedi/comedi_fops.c2
-rw-r--r--drivers/comedi/comedi_pci.c3
-rw-r--r--drivers/comedi/comedi_pcmcia.c3
-rw-r--r--drivers/comedi/comedi_usb.c3
-rw-r--r--drivers/comedi/drivers.c3
-rw-r--r--drivers/comedi/drivers/8255.c5
-rw-r--r--drivers/comedi/drivers/8255_pci.c6
-rw-r--r--drivers/comedi/drivers/addi_apci_1032.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_1500.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_1516.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_1564.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_16xx.c3
-rw-r--r--drivers/comedi/drivers/addi_apci_2032.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_2200.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_3120.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_3501.c2
-rw-r--r--drivers/comedi/drivers/addi_apci_3xxx.c3
-rw-r--r--drivers/comedi/drivers/addi_watchdog.c2
-rw-r--r--drivers/comedi/drivers/adl_pci6208.c3
-rw-r--r--drivers/comedi/drivers/adl_pci7x3x.c3
-rw-r--r--drivers/comedi/drivers/adl_pci8164.c3
-rw-r--r--drivers/comedi/drivers/adl_pci9111.c5
-rw-r--r--drivers/comedi/drivers/adl_pci9118.c5
-rw-r--r--drivers/comedi/drivers/adq12b.c3
-rw-r--r--drivers/comedi/drivers/adv_pci1710.c5
-rw-r--r--drivers/comedi/drivers/adv_pci1720.c3
-rw-r--r--drivers/comedi/drivers/adv_pci1723.c3
-rw-r--r--drivers/comedi/drivers/adv_pci1724.c3
-rw-r--r--drivers/comedi/drivers/adv_pci1760.c3
-rw-r--r--drivers/comedi/drivers/adv_pci_dio.c8
-rw-r--r--drivers/comedi/drivers/aio_aio12_8.c7
-rw-r--r--drivers/comedi/drivers/aio_iiro_16.c3
-rw-r--r--drivers/comedi/drivers/amplc_dio200.c2
-rw-r--r--drivers/comedi/drivers/amplc_dio200_common.c7
-rw-r--r--drivers/comedi/drivers/amplc_dio200_pci.c3
-rw-r--r--drivers/comedi/drivers/amplc_pc236.c3
-rw-r--r--drivers/comedi/drivers/amplc_pc236_common.c5
-rw-r--r--drivers/comedi/drivers/amplc_pc263.c2
-rw-r--r--drivers/comedi/drivers/amplc_pci224.c6
-rw-r--r--drivers/comedi/drivers/amplc_pci230.c8
-rw-r--r--drivers/comedi/drivers/amplc_pci236.c3
-rw-r--r--drivers/comedi/drivers/amplc_pci263.c3
-rw-r--r--drivers/comedi/drivers/c6xdigio.c3
-rw-r--r--drivers/comedi/drivers/cb_das16_cs.c6
-rw-r--r--drivers/comedi/drivers/cb_pcidas.c7
-rw-r--r--drivers/comedi/drivers/cb_pcidas64.c5
-rw-r--r--drivers/comedi/drivers/cb_pcidda.c6
-rw-r--r--drivers/comedi/drivers/cb_pcimdas.c7
-rw-r--r--drivers/comedi/drivers/cb_pcimdda.c6
-rw-r--r--drivers/comedi/drivers/comedi_8254.c6
-rw-r--r--drivers/comedi/drivers/comedi_8255.c5
-rw-r--r--drivers/comedi/drivers/comedi_bond.c6
-rw-r--r--drivers/comedi/drivers/comedi_isadma.c6
-rw-r--r--drivers/comedi/drivers/comedi_parport.c3
-rw-r--r--drivers/comedi/drivers/comedi_test.c4
-rw-r--r--drivers/comedi/drivers/contec_pci_dio.c3
-rw-r--r--drivers/comedi/drivers/dac02.c3
-rw-r--r--drivers/comedi/drivers/daqboard2000.c5
-rw-r--r--drivers/comedi/drivers/das08.c7
-rw-r--r--drivers/comedi/drivers/das08_cs.c3
-rw-r--r--drivers/comedi/drivers/das08_isa.c2
-rw-r--r--drivers/comedi/drivers/das08_pci.c3
-rw-r--r--drivers/comedi/drivers/das16.c10
-rw-r--r--drivers/comedi/drivers/das16m1.c7
-rw-r--r--drivers/comedi/drivers/das1800.c8
-rw-r--r--drivers/comedi/drivers/das6402.c6
-rw-r--r--drivers/comedi/drivers/das800.c6
-rw-r--r--drivers/comedi/drivers/dmm32at.c5
-rw-r--r--drivers/comedi/drivers/dt2801.c2
-rw-r--r--drivers/comedi/drivers/dt2811.c3
-rw-r--r--drivers/comedi/drivers/dt2814.c3
-rw-r--r--drivers/comedi/drivers/dt2815.c3
-rw-r--r--drivers/comedi/drivers/dt2817.c2
-rw-r--r--drivers/comedi/drivers/dt282x.c6
-rw-r--r--drivers/comedi/drivers/dt3000.c3
-rw-r--r--drivers/comedi/drivers/dt9812.c3
-rw-r--r--drivers/comedi/drivers/dyna_pci10xx.c3
-rw-r--r--drivers/comedi/drivers/fl512.c3
-rw-r--r--drivers/comedi/drivers/gsc_hpdi.c3
-rw-r--r--drivers/comedi/drivers/icp_multi.c3
-rw-r--r--drivers/comedi/drivers/ii_pci20kc.c2
-rw-r--r--drivers/comedi/drivers/jr3_pci.c3
-rw-r--r--drivers/comedi/drivers/ke_counter.c3
-rw-r--r--drivers/comedi/drivers/me4000.c5
-rw-r--r--drivers/comedi/drivers/me_daq.c3
-rw-r--r--drivers/comedi/drivers/mf6x4.c3
-rw-r--r--drivers/comedi/drivers/mite.c3
-rw-r--r--drivers/comedi/drivers/mpc624.c3
-rw-r--r--drivers/comedi/drivers/multiq3.c3
-rw-r--r--drivers/comedi/drivers/ni_6527.c3
-rw-r--r--drivers/comedi/drivers/ni_65xx.c3
-rw-r--r--drivers/comedi/drivers/ni_660x.c3
-rw-r--r--drivers/comedi/drivers/ni_670x.c3
-rw-r--r--drivers/comedi/drivers/ni_at_a2150.c8
-rw-r--r--drivers/comedi/drivers/ni_at_ao.c6
-rw-r--r--drivers/comedi/drivers/ni_atmio.c5
-rw-r--r--drivers/comedi/drivers/ni_atmio16d.c5
-rw-r--r--drivers/comedi/drivers/ni_daq_700.c3
-rw-r--r--drivers/comedi/drivers/ni_daq_dio24.c5
-rw-r--r--drivers/comedi/drivers/ni_labpc.c3
-rw-r--r--drivers/comedi/drivers/ni_labpc_common.c7
-rw-r--r--drivers/comedi/drivers/ni_labpc_cs.c3
-rw-r--r--drivers/comedi/drivers/ni_labpc_isadma.c5
-rw-r--r--drivers/comedi/drivers/ni_labpc_pci.c3
-rw-r--r--drivers/comedi/drivers/ni_mio_common.c2
-rw-r--r--drivers/comedi/drivers/ni_mio_cs.c4
-rw-r--r--drivers/comedi/drivers/ni_pcidio.c3
-rw-r--r--drivers/comedi/drivers/ni_pcimio.c4
-rw-r--r--drivers/comedi/drivers/ni_routes.c3
-rw-r--r--drivers/comedi/drivers/ni_routes.h2
-rw-r--r--drivers/comedi/drivers/ni_routing/ni_route_values.h2
-rw-r--r--drivers/comedi/drivers/ni_routing/tools/.gitignore1
-rw-r--r--drivers/comedi/drivers/ni_routing/tools/Makefile29
-rw-r--r--drivers/comedi/drivers/ni_tio.h2
-rw-r--r--drivers/comedi/drivers/ni_usb6501.c3
-rw-r--r--drivers/comedi/drivers/pcl711.c6
-rw-r--r--drivers/comedi/drivers/pcl724.c5
-rw-r--r--drivers/comedi/drivers/pcl726.c3
-rw-r--r--drivers/comedi/drivers/pcl730.c2
-rw-r--r--drivers/comedi/drivers/pcl812.c8
-rw-r--r--drivers/comedi/drivers/pcl816.c8
-rw-r--r--drivers/comedi/drivers/pcl818.c8
-rw-r--r--drivers/comedi/drivers/pcm3724.c5
-rw-r--r--drivers/comedi/drivers/pcmad.c2
-rw-r--r--drivers/comedi/drivers/pcmda12.c2
-rw-r--r--drivers/comedi/drivers/pcmmio.c3
-rw-r--r--drivers/comedi/drivers/pcmuio.c3
-rw-r--r--drivers/comedi/drivers/quatech_daqp_cs.c3
-rw-r--r--drivers/comedi/drivers/rtd520.c5
-rw-r--r--drivers/comedi/drivers/rti800.c2
-rw-r--r--drivers/comedi/drivers/rti802.c2
-rw-r--r--drivers/comedi/drivers/s526.c2
-rw-r--r--drivers/comedi/drivers/s626.c3
-rw-r--r--drivers/comedi/drivers/ssv_dnp.c2
-rw-r--r--drivers/comedi/drivers/usbdux.c3
-rw-r--r--drivers/comedi/drivers/usbduxfast.c2
-rw-r--r--drivers/comedi/drivers/usbduxsigma.c3
-rw-r--r--drivers/comedi/drivers/vmk80xx.c3
-rw-r--r--drivers/comedi/kcomedilib/kcomedilib_main.c6
-rw-r--r--drivers/comedi/proc.c2
-rw-r--r--drivers/comedi/range.c2
-rw-r--r--drivers/counter/104-quad-8.c175
-rw-r--r--drivers/counter/counter-core.c186
-rw-r--r--drivers/counter/ftm-quaddec.c36
-rw-r--r--drivers/counter/intel-qep.c46
-rw-r--r--drivers/counter/interrupt-cnt.c38
-rw-r--r--drivers/counter/microchip-tcb-capture.c44
-rw-r--r--drivers/counter/stm32-lptimer-cnt.c51
-rw-r--r--drivers/counter/stm32-timer-cnt.c48
-rw-r--r--drivers/counter/ti-eqep.c52
-rw-r--r--drivers/crypto/virtio/virtio_crypto_core.c8
-rw-r--r--drivers/dax/bus.c32
-rw-r--r--drivers/dax/bus.h1
-rw-r--r--drivers/dax/device.c126
-rw-r--r--drivers/dma-buf/heaps/cma_heap.c6
-rw-r--r--drivers/dma/at_xdmac.c194
-rw-r--r--drivers/dma/dma-jz4780.c118
-rw-r--r--drivers/dma/dmaengine.c7
-rw-r--r--drivers/dma/idxd/device.c222
-rw-r--r--drivers/dma/idxd/dma.c40
-rw-r--r--drivers/dma/idxd/idxd.h67
-rw-r--r--drivers/dma/idxd/init.c196
-rw-r--r--drivers/dma/idxd/irq.c239
-rw-r--r--drivers/dma/idxd/registers.h15
-rw-r--r--drivers/dma/idxd/submit.c69
-rw-r--r--drivers/dma/idxd/sysfs.c215
-rw-r--r--drivers/dma/ioat/sysfs.c3
-rw-r--r--drivers/dma/pch_dma.c2
-rw-r--r--drivers/dma/ppc4xx/adma.c3
-rw-r--r--drivers/dma/qcom/gpi.c4
-rw-r--r--drivers/dma/sh/rcar-dmac.c17
-rw-r--r--drivers/dma/sh/shdma-base.c6
-rw-r--r--drivers/dma/stm32-mdma.c78
-rw-r--r--drivers/dma/ti/Makefile3
-rw-r--r--drivers/dma/ti/edma.c3
-rw-r--r--drivers/dma/ti/k3-psil-j721s2.c167
-rw-r--r--drivers/dma/ti/k3-psil-priv.h1
-rw-r--r--drivers/dma/ti/k3-psil.c1
-rw-r--r--drivers/dma/ti/k3-udma.c1
-rw-r--r--drivers/dma/uniphier-xdmac.c5
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c133
-rw-r--r--drivers/extcon/extcon-usb-gpio.c2
-rw-r--r--drivers/extcon/extcon.c14
-rw-r--r--drivers/firmware/arm_scmi/virtio.c2
-rw-r--r--drivers/firmware/efi/libstub/efi-stub.c2
-rw-r--r--drivers/firmware/google/Kconfig6
-rw-r--r--drivers/firmware/qemu_fw_cfg.c21
-rw-r--r--drivers/firmware/xilinx/zynqmp.c40
-rw-r--r--drivers/fpga/altera-cvp.c12
-rw-r--r--drivers/fpga/altera-fpga2sdram.c12
-rw-r--r--drivers/fpga/altera-freeze-bridge.c10
-rw-r--r--drivers/fpga/altera-hps2fpga.c12
-rw-r--r--drivers/fpga/altera-pr-ip-core.c7
-rw-r--r--drivers/fpga/altera-ps-spi.c9
-rw-r--r--drivers/fpga/dfl-fme-br.c10
-rw-r--r--drivers/fpga/dfl-fme-mgr.c22
-rw-r--r--drivers/fpga/dfl-fme-region.c17
-rw-r--r--drivers/fpga/dfl.c12
-rw-r--r--drivers/fpga/fpga-bridge.c122
-rw-r--r--drivers/fpga/fpga-mgr.c215
-rw-r--r--drivers/fpga/fpga-region.c119
-rw-r--r--drivers/fpga/ice40-spi.c9
-rw-r--r--drivers/fpga/machxo2-spi.c9
-rw-r--r--drivers/fpga/of-fpga-region.c12
-rw-r--r--drivers/fpga/socfpga-a10.c16
-rw-r--r--drivers/fpga/socfpga.c9
-rw-r--r--drivers/fpga/stratix10-soc.c18
-rw-r--r--drivers/fpga/ts73xx-fpga.c9
-rw-r--r--drivers/fpga/versal-fpga.c9
-rw-r--r--drivers/fpga/xilinx-pr-decoupler.c17
-rw-r--r--drivers/fpga/xilinx-spi.c11
-rw-r--r--drivers/fpga/zynq-fpga.c16
-rw-r--r--drivers/fpga/zynqmp-fpga.c9
-rw-r--r--drivers/gnss/Kconfig11
-rw-r--r--drivers/gnss/Makefile3
-rw-r--r--drivers/gnss/mtk.c2
-rw-r--r--drivers/gnss/serial.c2
-rw-r--r--drivers/gnss/sirf.c2
-rw-r--r--drivers/gnss/ubx.c2
-rw-r--r--drivers/gnss/usb.c214
-rw-r--r--drivers/gpio/gpio-idt3243x.c6
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c6
-rw-r--r--drivers/gpio/gpio-virtio.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c44
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c145
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c114
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_cp_psp.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.h4
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c16
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c14
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c1
-rw-r--r--drivers/gpu/drm/drm_mipi_dbi.c2
-rw-r--r--drivers/gpu/drm/drm_mm.c4
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c9
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c27
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c18
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c22
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h8
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c3
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c22
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c4
-rw-r--r--drivers/gpu/drm/ttm/ttm_module.c4
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_kms.c2
-rw-r--r--drivers/greybus/es2.c2
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/hid-input.c2
-rw-r--r--drivers/hid/hid-vivaldi.c41
-rw-r--r--drivers/hid/uhid.c49
-rw-r--r--drivers/hid/wacom_wac.c39
-rw-r--r--drivers/hv/channel_mgmt.c2
-rw-r--r--drivers/hv/hv_common.c15
-rw-r--r--drivers/hv/vmbus_drv.c4
-rw-r--r--drivers/hwmon/dell-smm-hwmon.c4
-rw-r--r--drivers/hwmon/ltc2992.c3
-rw-r--r--drivers/hwspinlock/stm32_hwspinlock.c58
-rw-r--r--drivers/hwtracing/coresight/coresight-cfg-preload.c9
-rw-r--r--drivers/hwtracing/coresight/coresight-config.h9
-rw-r--r--drivers/hwtracing/coresight/coresight-core.c2
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-core.c11
-rw-r--r--drivers/hwtracing/coresight/coresight-stm.c10
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg-configfs.c87
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg-configfs.h4
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg.c315
-rw-r--r--drivers/hwtracing/coresight/coresight-syscfg.h39
-rw-r--r--drivers/i2c/busses/Kconfig18
-rw-r--r--drivers/i2c/busses/Makefile1
-rw-r--r--drivers/i2c/busses/i2c-aspeed.c2
-rw-r--r--drivers/i2c/busses/i2c-bcm2835.c11
-rw-r--r--drivers/i2c/busses/i2c-designware-core.h13
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c7
-rw-r--r--drivers/i2c/busses/i2c-designware-pcidrv.c51
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c2
-rw-r--r--drivers/i2c/busses/i2c-exynos5.c110
-rw-r--r--drivers/i2c/busses/i2c-i801.c288
-rw-r--r--drivers/i2c/busses/i2c-imx.c92
-rw-r--r--drivers/i2c/busses/i2c-mpc.c23
-rw-r--r--drivers/i2c/busses/i2c-rcar.c26
-rw-r--r--drivers/i2c/busses/i2c-riic.c10
-rw-r--r--drivers/i2c/busses/i2c-rk3x.c7
-rw-r--r--drivers/i2c/busses/i2c-sh_mobile.c60
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c14
-rw-r--r--drivers/i2c/busses/i2c-tegra.c69
-rw-r--r--drivers/i2c/busses/i2c-virtio.c2
-rw-r--r--drivers/i2c/busses/i2c-xlp9xx.c7
-rw-r--r--drivers/i2c/busses/i2c-xlr.c470
-rw-r--r--drivers/i2c/i2c-core-base.c2
-rw-r--r--drivers/i2c/muxes/i2c-mux-gpio.c53
-rw-r--r--drivers/i3c/master.c3
-rw-r--r--drivers/i3c/master/dw-i3c-master.c4
-rw-r--r--drivers/i3c/master/mipi-i3c-hci/core.c2
-rw-r--r--drivers/i3c/master/mipi-i3c-hci/dat_v1.c4
-rw-r--r--drivers/i3c/master/mipi-i3c-hci/dma.c2
-rw-r--r--drivers/i3c/master/mipi-i3c-hci/hci.h2
-rw-r--r--drivers/i3c/master/svc-i3c-master.c341
-rw-r--r--drivers/iio/Kconfig2
-rw-r--r--drivers/iio/Makefile2
-rw-r--r--drivers/iio/accel/bma180.c4
-rw-r--r--drivers/iio/accel/bma220_spi.c6
-rw-r--r--drivers/iio/accel/bmc150-accel-core.c2
-rw-r--r--drivers/iio/accel/kxcjk-1013.c5
-rw-r--r--drivers/iio/accel/mma7455_core.c3
-rw-r--r--drivers/iio/accel/mma7660.c8
-rw-r--r--drivers/iio/accel/mma8452.c2
-rw-r--r--drivers/iio/accel/mma9553.c2
-rw-r--r--drivers/iio/accel/sca3000.c17
-rw-r--r--drivers/iio/accel/stk8312.c2
-rw-r--r--drivers/iio/accel/stk8ba50.c3
-rw-r--r--drivers/iio/adc/Kconfig21
-rw-r--r--drivers/iio/adc/Makefile1
-rw-r--r--drivers/iio/adc/ad7124.c2
-rw-r--r--drivers/iio/adc/ad7192.c3
-rw-r--r--drivers/iio/adc/ad7266.c3
-rw-r--r--drivers/iio/adc/ad7606.h2
-rw-r--r--drivers/iio/adc/ad_sigma_delta.c4
-rw-r--r--drivers/iio/adc/at91-sama5d2_adc.c38
-rw-r--r--drivers/iio/adc/axp20x_adc.c45
-rw-r--r--drivers/iio/adc/envelope-detector.c3
-rw-r--r--drivers/iio/adc/hi8435.c2
-rw-r--r--drivers/iio/adc/imx7d_adc.c5
-rw-r--r--drivers/iio/adc/ina2xx-adc.c15
-rw-r--r--drivers/iio/adc/lpc18xx_adc.c6
-rw-r--r--drivers/iio/adc/max9611.c20
-rw-r--r--drivers/iio/adc/mcp3911.c9
-rw-r--r--drivers/iio/adc/rcar-gyroadc.c3
-rw-r--r--drivers/iio/adc/rzg2l_adc.c4
-rw-r--r--drivers/iio/adc/stm32-adc.c3
-rw-r--r--drivers/iio/adc/stmpe-adc.c5
-rw-r--r--drivers/iio/adc/ti-adc081c.c22
-rw-r--r--drivers/iio/adc/ti-adc12138.c14
-rw-r--r--drivers/iio/adc/ti-ads1015.c10
-rw-r--r--drivers/iio/adc/ti-ads124s08.c3
-rw-r--r--drivers/iio/adc/ti-ads8688.c4
-rw-r--r--drivers/iio/adc/xilinx-ams.c1451
-rw-r--r--drivers/iio/adc/xilinx-xadc-core.c64
-rw-r--r--drivers/iio/addac/Kconfig20
-rw-r--r--drivers/iio/addac/Makefile7
-rw-r--r--drivers/iio/addac/ad74413r.c1475
-rw-r--r--drivers/iio/amplifiers/hmc425a.c2
-rw-r--r--drivers/iio/buffer/industrialio-buffer-dmaengine.c2
-rw-r--r--drivers/iio/chemical/atlas-sensor.c4
-rw-r--r--drivers/iio/chemical/sunrise_co2.c4
-rw-r--r--drivers/iio/chemical/vz89x.c2
-rw-r--r--drivers/iio/common/scmi_sensors/scmi_iio.c57
-rw-r--r--drivers/iio/common/st_sensors/st_sensors_core.c4
-rw-r--r--drivers/iio/dac/Kconfig22
-rw-r--r--drivers/iio/dac/Makefile2
-rw-r--r--drivers/iio/dac/ad3552r.c1138
-rw-r--r--drivers/iio/dac/ad5064.c4
-rw-r--r--drivers/iio/dac/ad5380.c2
-rw-r--r--drivers/iio/dac/ad5446.c2
-rw-r--r--drivers/iio/dac/ad5504.c2
-rw-r--r--drivers/iio/dac/ad5624r_spi.c2
-rw-r--r--drivers/iio/dac/ad5686.c2
-rw-r--r--drivers/iio/dac/ad5755.c152
-rw-r--r--drivers/iio/dac/ad5758.c3
-rw-r--r--drivers/iio/dac/ad5766.c13
-rw-r--r--drivers/iio/dac/ad5791.c2
-rw-r--r--drivers/iio/dac/ad7293.c934
-rw-r--r--drivers/iio/dac/dpot-dac.c2
-rw-r--r--drivers/iio/dac/lpc18xx_dac.c3
-rw-r--r--drivers/iio/dac/max5821.c2
-rw-r--r--drivers/iio/dac/mcp4725.c10
-rw-r--r--drivers/iio/dac/stm32-dac.c2
-rw-r--r--drivers/iio/dac/ti-dac082s085.c2
-rw-r--r--drivers/iio/dac/ti-dac5571.c2
-rw-r--r--drivers/iio/dac/ti-dac7311.c2
-rw-r--r--drivers/iio/dummy/iio_simple_dummy_buffer.c2
-rw-r--r--drivers/iio/filter/Kconfig18
-rw-r--r--drivers/iio/filter/Makefile7
-rw-r--r--drivers/iio/filter/admv8818.c665
-rw-r--r--drivers/iio/frequency/Kconfig10
-rw-r--r--drivers/iio/frequency/Makefile1
-rw-r--r--drivers/iio/frequency/admv1013.c656
-rw-r--r--drivers/iio/health/afe4403.c5
-rw-r--r--drivers/iio/health/afe4404.c5
-rw-r--r--drivers/iio/iio_core.h2
-rw-r--r--drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c2
-rw-r--r--drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c2
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c2
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c2
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c4
-rw-r--r--drivers/iio/industrialio-buffer.c20
-rw-r--r--drivers/iio/industrialio-core.c6
-rw-r--r--drivers/iio/industrialio-trigger.c36
-rw-r--r--drivers/iio/light/cm3605.c6
-rw-r--r--drivers/iio/light/gp2ap020a00f.c5
-rw-r--r--drivers/iio/light/ltr501.c42
-rw-r--r--drivers/iio/magnetometer/ak8975.c2
-rw-r--r--drivers/iio/magnetometer/hmc5843_core.c4
-rw-r--r--drivers/iio/magnetometer/mag3110.c6
-rw-r--r--drivers/iio/potentiometer/mcp41010.c6
-rw-r--r--drivers/iio/potentiostat/lmp91000.c4
-rw-r--r--drivers/iio/pressure/bmp280-core.c11
-rw-r--r--drivers/iio/pressure/bmp280-i2c.c2
-rw-r--r--drivers/iio/pressure/bmp280-spi.c2
-rw-r--r--drivers/iio/pressure/mpl3115.c16
-rw-r--r--drivers/iio/pressure/ms5611.h6
-rw-r--r--drivers/iio/pressure/ms5611_core.c7
-rw-r--r--drivers/iio/pressure/ms5611_i2c.c11
-rw-r--r--drivers/iio/pressure/ms5611_spi.c17
-rw-r--r--drivers/iio/proximity/as3935.c6
-rw-r--r--drivers/iio/test/iio-test-format.c123
-rw-r--r--drivers/iio/trigger/iio-trig-interrupt.c4
-rw-r--r--drivers/iio/trigger/iio-trig-sysfs.c4
-rw-r--r--drivers/iio/trigger/stm32-timer-trigger.c4
-rw-r--r--drivers/infiniband/hw/irdma/hw.c16
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c3
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c7
-rw-r--r--drivers/input/ff-core.c2
-rw-r--r--drivers/input/keyboard/gpio_keys.c2
-rw-r--r--drivers/input/misc/axp20x-pek.c72
-rw-r--r--drivers/input/misc/palmas-pwrbutton.c9
-rw-r--r--drivers/input/mouse/byd.c2
-rw-r--r--drivers/input/touchscreen/goodix.c127
-rw-r--r--drivers/input/touchscreen/goodix.h1
-rw-r--r--drivers/input/touchscreen/silead.c172
-rw-r--r--drivers/input/touchscreen/ti_am335x_tsc.c20
-rw-r--r--drivers/input/touchscreen/ucb1400_ts.c4
-rw-r--r--drivers/input/touchscreen/wacom_i2c.c44
-rw-r--r--drivers/input/touchscreen/zinitix.c22
-rw-r--r--drivers/interconnect/qcom/Kconfig27
-rw-r--r--drivers/interconnect/qcom/Makefile6
-rw-r--r--drivers/interconnect/qcom/icc-rpm.c64
-rw-r--r--drivers/interconnect/qcom/icc-rpm.h15
-rw-r--r--drivers/interconnect/qcom/icc-rpmh.c10
-rw-r--r--drivers/interconnect/qcom/msm8916.c4
-rw-r--r--drivers/interconnect/qcom/msm8939.c5
-rw-r--r--drivers/interconnect/qcom/msm8996.c2110
-rw-r--r--drivers/interconnect/qcom/msm8996.h149
-rw-r--r--drivers/interconnect/qcom/osm-l3.c20
-rw-r--r--drivers/interconnect/qcom/qcm2290.c1363
-rw-r--r--drivers/interconnect/qcom/sc7280.h2
-rw-r--r--drivers/interconnect/qcom/sdm660.c7
-rw-r--r--drivers/interconnect/qcom/sm8150.c1
-rw-r--r--drivers/interconnect/qcom/sm8250.c1
-rw-r--r--drivers/interconnect/qcom/sm8350.c1
-rw-r--r--drivers/interconnect/qcom/sm8450.c1987
-rw-r--r--drivers/interconnect/qcom/sm8450.h169
-rw-r--r--drivers/iommu/virtio-iommu.c2
-rw-r--r--drivers/macintosh/mac_hid.c24
-rw-r--r--drivers/media/cec/core/cec-core.c2
-rw-r--r--drivers/media/mc/mc-devnode.c2
-rw-r--r--drivers/message/fusion/mptbase.c149
-rw-r--r--drivers/message/fusion/mptctl.c82
-rw-r--r--drivers/message/fusion/mptlan.c90
-rw-r--r--drivers/message/fusion/mptsas.c94
-rw-r--r--drivers/misc/cxl/sysfs.c3
-rw-r--r--drivers/misc/eeprom/at24.c68
-rw-r--r--drivers/misc/eeprom/at25.c213
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c46
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c389
-rw-r--r--drivers/misc/habanalabs/common/context.c39
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c97
-rw-r--r--drivers/misc/habanalabs/common/device.c387
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c253
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h301
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c150
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c195
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c5
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c209
-rw-r--r--drivers/misc/habanalabs/common/irq.c14
-rw-r--r--drivers/misc/habanalabs/common/memory.c78
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c25
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu_v1.c18
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c56
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c313
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h4
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi_coresight.c4
-rw-r--r--drivers/misc/habanalabs/goya/goya.c165
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h14
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c4
-rw-r--r--drivers/misc/habanalabs/goya/goya_hwmgr.c31
-rw-r--r--drivers/misc/habanalabs/include/common/cpucp_if.h62
-rw-r--r--drivers/misc/habanalabs/include/common/hl_boot_if.h4
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h19
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h18
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h20
-rw-r--r--drivers/misc/lattice-ecp3-config.c12
-rw-r--r--drivers/misc/lkdtm/Makefile2
-rw-r--r--drivers/misc/lkdtm/bugs.c16
-rw-r--r--drivers/misc/lkdtm/core.c6
-rw-r--r--drivers/misc/mei/client.c4
-rw-r--r--drivers/misc/mei/hbm.c20
-rw-r--r--drivers/misc/mei/hw-txe.c6
-rw-r--r--drivers/misc/mei/init.c1
-rw-r--r--drivers/misc/pci_endpoint_test.c2
-rw-r--r--drivers/misc/sram.c1
-rw-r--r--drivers/misc/uacce/uacce.c12
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.c6
-rw-r--r--drivers/misc/vmw_vmci/vmci_event.c3
-rw-r--r--drivers/mmc/host/jz4740_mmc.c4
-rw-r--r--drivers/mmc/host/mxcmmc.c2
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c2
-rw-r--r--drivers/most/most_usb.c4
-rw-r--r--drivers/net/bonding/bond_main.c34
-rw-r--r--drivers/net/bonding/bond_procfs.c8
-rw-r--r--drivers/net/caif/caif_virtio.c2
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c31
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.h18
-rw-r--r--drivers/net/ethernet/apple/bmac.c5
-rw-r--r--drivers/net/ethernet/apple/mace.c16
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c10
-rw-r--r--drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c3
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c28
-rw-r--r--drivers/net/ethernet/i825xx/sni_82596.c3
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera.h1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.c4
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c24
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router_hw.c40
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router_hw.h3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c44
-rw-r--r--drivers/net/ethernet/mscc/ocelot_net.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c101
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c2
-rw-r--r--drivers/net/ethernet/vertexcom/Kconfig2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c135
-rw-r--r--drivers/net/hyperv/hyperv_net.h5
-rw-r--r--drivers/net/hyperv/netvsc.c136
-rw-r--r--drivers/net/hyperv/netvsc_drv.c1
-rw-r--r--drivers/net/hyperv/rndis_filter.c2
-rw-r--r--drivers/net/ipa/ipa_endpoint.c28
-rw-r--r--drivers/net/ipa/ipa_endpoint.h17
-rw-r--r--drivers/net/phy/at803x.c2
-rw-r--r--drivers/net/phy/marvell.c56
-rw-r--r--drivers/net/phy/micrel.c36
-rw-r--r--drivers/net/phy/sfp.c25
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/smsc95xx.c3
-rw-r--r--drivers/net/virtio_net.c6
-rw-r--r--drivers/net/wireguard/noise.c45
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c2
-rw-r--r--drivers/net/wireless/cisco/airo.c22
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_ap.c16
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_download.c2
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_proc.c24
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/ray_cs.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_coex.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_main.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_sdio_ops.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_usb_ops.c2
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c4
-rw-r--r--drivers/nfc/pn544/i2c.c2
-rw-r--r--drivers/nfc/st21nfca/se.c10
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c2
-rw-r--r--drivers/ntb/hw/mscc/ntb_hw_switchtec.c26
-rw-r--r--drivers/ntb/msi.c3
-rw-r--r--drivers/nubus/proc.c36
-rw-r--r--drivers/nvdimm/virtio_pmem.c2
-rw-r--r--drivers/nvmem/core.c2
-rw-r--r--drivers/nvmem/mtk-efuse.c13
-rw-r--r--drivers/of/base.c131
-rw-r--r--drivers/of/device.c2
-rw-r--r--drivers/of/fdt.c6
-rw-r--r--drivers/parisc/led.c4
-rw-r--r--drivers/parisc/pdc_stable.c4
-rw-r--r--drivers/pci/Kconfig2
-rw-r--r--drivers/pci/access.c36
-rw-r--r--drivers/pci/controller/Kconfig8
-rw-r--r--drivers/pci/controller/cadence/pci-j721e.c18
-rw-r--r--drivers/pci/controller/cadence/pcie-cadence-plat.c6
-rw-r--r--drivers/pci/controller/cadence/pcie-cadence.h2
-rw-r--r--drivers/pci/controller/dwc/pci-dra7xx.c8
-rw-r--r--drivers/pci/controller/dwc/pci-exynos.c4
-rw-r--r--drivers/pci/controller/dwc/pci-imx6.c81
-rw-r--r--drivers/pci/controller/dwc/pci-keystone.c37
-rw-r--r--drivers/pci/controller/dwc/pci-layerscape.c152
-rw-r--r--drivers/pci/controller/dwc/pcie-artpec6.c6
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-plat.c6
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.c7
-rw-r--r--drivers/pci/controller/dwc/pcie-hisi.c32
-rw-r--r--drivers/pci/controller/dwc/pcie-histb.c4
-rw-r--r--drivers/pci/controller/dwc/pcie-intel-gw.c204
-rw-r--r--drivers/pci/controller/dwc/pcie-kirin.c10
-rw-r--r--drivers/pci/controller/dwc/pcie-qcom-ep.c6
-rw-r--r--drivers/pci/controller/dwc/pcie-qcom.c14
-rw-r--r--drivers/pci/controller/dwc/pcie-spear13xx.c8
-rw-r--r--drivers/pci/controller/dwc/pcie-tegra194.c222
-rw-r--r--drivers/pci/controller/dwc/pcie-uniphier.c147
-rw-r--r--drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c84
-rw-r--r--drivers/pci/controller/pci-aardvark.c79
-rw-r--r--drivers/pci/controller/pci-hyperv.c314
-rw-r--r--drivers/pci/controller/pci-mvebu.c542
-rw-r--r--drivers/pci/controller/pci-rcar-gen2.c14
-rw-r--r--drivers/pci/controller/pci-thunder-ecam.c46
-rw-r--r--drivers/pci/controller/pci-thunder-pem.c4
-rw-r--r--drivers/pci/controller/pci-xgene-msi.c6
-rw-r--r--drivers/pci/controller/pci-xgene.c58
-rw-r--r--drivers/pci/controller/pcie-altera.c12
-rw-r--r--drivers/pci/controller/pcie-apple.c10
-rw-r--r--drivers/pci/controller/pcie-brcmstb.c304
-rw-r--r--drivers/pci/controller/pcie-iproc-bcma.c22
-rw-r--r--drivers/pci/controller/pcie-iproc-platform.c16
-rw-r--r--drivers/pci/controller/pcie-iproc.c4
-rw-r--r--drivers/pci/controller/pcie-mediatek-gen3.c382
-rw-r--r--drivers/pci/controller/pcie-mediatek.c18
-rw-r--r--drivers/pci/controller/pcie-microchip-host.c42
-rw-r--r--drivers/pci/controller/pcie-mt7621.c75
-rw-r--r--drivers/pci/controller/pcie-rcar-host.c14
-rw-r--r--drivers/pci/controller/pcie-rockchip-host.c4
-rw-r--r--drivers/pci/controller/pcie-xilinx-cpm.c44
-rw-r--r--drivers/pci/controller/pcie-xilinx-nwl.c30
-rw-r--r--drivers/pci/controller/pcie-xilinx.c158
-rw-r--r--drivers/pci/controller/vmd.c61
-rw-r--r--drivers/pci/endpoint/functions/pci-epf-ntb.c2
-rw-r--r--drivers/pci/endpoint/pci-epc-core.c2
-rw-r--r--drivers/pci/hotplug/TODO5
-rw-r--r--drivers/pci/hotplug/cpqphp_ctrl.c4
-rw-r--r--drivers/pci/hotplug/ibmphp_core.c74
-rw-r--r--drivers/pci/hotplug/pciehp.h3
-rw-r--r--drivers/pci/hotplug/pciehp_core.c2
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c38
-rw-r--r--drivers/pci/msi/irqdomain.c4
-rw-r--r--drivers/pci/msi/legacy.c1
-rw-r--r--drivers/pci/of.c2
-rw-r--r--drivers/pci/p2pdma.c2
-rw-r--r--drivers/pci/pci-bridge-emul.c119
-rw-r--r--drivers/pci/pci.c26
-rw-r--r--drivers/pci/pcie/aspm.c94
-rw-r--r--drivers/pci/pcie/dpc.c4
-rw-r--r--drivers/pci/pcie/pme.c4
-rw-r--r--drivers/pci/probe.c32
-rw-r--r--drivers/pci/proc.c10
-rw-r--r--drivers/pci/quirks.c26
-rw-r--r--drivers/pci/setup-res.c8
-rw-r--r--drivers/pci/slot.c3
-rw-r--r--drivers/pci/switch/switchtec.c11
-rw-r--r--drivers/pcmcia/at91_cf.c6
-rw-r--r--drivers/phy/amlogic/Kconfig10
-rw-r--r--drivers/phy/amlogic/Makefile1
-rw-r--r--drivers/phy/amlogic/phy-meson8-hdmi-tx.c160
-rw-r--r--drivers/phy/broadcom/phy-bcm-ns-usb2.c54
-rw-r--r--drivers/phy/cadence/phy-cadence-sierra.c1312
-rw-r--r--drivers/phy/cadence/phy-cadence-torrent.c6
-rw-r--r--drivers/phy/freescale/Kconfig8
-rw-r--r--drivers/phy/freescale/Makefile1
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8m-pcie.c237
-rw-r--r--drivers/phy/intel/Kconfig10
-rw-r--r--drivers/phy/intel/Makefile1
-rw-r--r--drivers/phy/intel/phy-intel-thunderbay-emmc.c509
-rw-r--r--drivers/phy/mediatek/phy-mtk-io.h38
-rw-r--r--drivers/phy/mediatek/phy-mtk-mipi-dsi.c2
-rw-r--r--drivers/phy/mediatek/phy-mtk-tphy.c608
-rw-r--r--drivers/phy/mediatek/phy-mtk-xsphy.c140
-rw-r--r--drivers/phy/microchip/Kconfig8
-rw-r--r--drivers/phy/microchip/Makefile1
-rw-r--r--drivers/phy/microchip/lan966x_serdes.c545
-rw-r--r--drivers/phy/microchip/lan966x_serdes_regs.h209
-rw-r--r--drivers/phy/phy-can-transceiver.c4
-rw-r--r--drivers/phy/qualcomm/Kconfig10
-rw-r--r--drivers/phy/qualcomm/Makefile1
-rw-r--r--drivers/phy/qualcomm/phy-qcom-edp.c674
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp.c313
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp.h104
-rw-r--r--drivers/phy/rockchip/phy-rockchip-inno-usb2.c260
-rw-r--r--drivers/phy/socionext/Kconfig2
-rw-r--r--drivers/phy/socionext/phy-uniphier-ahci.c201
-rw-r--r--drivers/phy/socionext/phy-uniphier-pcie.c70
-rw-r--r--drivers/phy/socionext/phy-uniphier-usb3hs.c4
-rw-r--r--drivers/phy/socionext/phy-uniphier-usb3ss.c14
-rw-r--r--drivers/phy/st/phy-stm32-usbphyc.c10
-rw-r--r--drivers/phy/tegra/xusb.c2
-rw-r--r--drivers/phy/ti/phy-omap-control.c6
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c4
-rw-r--r--drivers/platform/x86/toshiba_acpi.c16
-rw-r--r--drivers/pnp/isapnp/proc.c2
-rw-r--r--drivers/pnp/pnpbios/core.c6
-rw-r--r--drivers/pnp/pnpbios/proc.c4
-rw-r--r--drivers/pwm/core.c139
-rw-r--r--drivers/pwm/pwm-img.c35
-rw-r--r--drivers/pwm/pwm-twl.c62
-rw-r--r--drivers/pwm/pwm-vt8500.c57
-rw-r--r--drivers/rapidio/switches/Kconfig11
-rw-r--r--drivers/rapidio/switches/Makefile2
-rw-r--r--drivers/rapidio/switches/tsi568.c195
-rw-r--r--drivers/rapidio/switches/tsi57x.c365
-rw-r--r--drivers/remoteproc/Kconfig11
-rw-r--r--drivers/remoteproc/Makefile1
-rw-r--r--drivers/remoteproc/imx_rproc.c9
-rw-r--r--drivers/remoteproc/ingenic_rproc.c5
-rw-r--r--drivers/remoteproc/mtk_scp_ipi.c4
-rw-r--r--drivers/remoteproc/qcom_pil_info.c2
-rw-r--r--drivers/remoteproc/qcom_q6v5_pas.c38
-rw-r--r--drivers/remoteproc/rcar_rproc.c224
-rw-r--r--drivers/remoteproc/remoteproc_core.c4
-rw-r--r--drivers/remoteproc/remoteproc_coredump.c2
-rw-r--r--drivers/remoteproc/st_slim_rproc.c2
-rw-r--r--drivers/remoteproc/stm32_rproc.c2
-rw-r--r--drivers/remoteproc/ti_k3_dsp_remoteproc.c1
-rw-r--r--drivers/remoteproc/ti_k3_r5_remoteproc.c5
-rw-r--r--drivers/rpmsg/qcom_glink_native.c2
-rw-r--r--drivers/rpmsg/qcom_smd.c2
-rw-r--r--drivers/rpmsg/rpmsg_char.c7
-rw-r--r--drivers/rpmsg/rpmsg_core.c44
-rw-r--r--drivers/rpmsg/virtio_rpmsg_bus.c4
-rw-r--r--drivers/rtc/Kconfig24
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/dev.c6
-rw-r--r--drivers/rtc/rtc-cmos.c201
-rw-r--r--drivers/rtc/rtc-da9063.c16
-rw-r--r--drivers/rtc/rtc-ftrtc010.c8
-rw-r--r--drivers/rtc/rtc-gamecube.c377
-rw-r--r--drivers/rtc/rtc-mc146818-lib.c182
-rw-r--r--drivers/rtc/rtc-pcf2127.c2
-rw-r--r--drivers/rtc/rtc-pcf85063.c97
-rw-r--r--drivers/rtc/rtc-pxa.c4
-rw-r--r--drivers/rtc/rtc-rs5c372.c185
-rw-r--r--drivers/rtc/rtc-rv8803.c6
-rw-r--r--drivers/rtc/rtc-sunplus.c362
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/aic7xxx/aic79xx_osm.c6
-rw-r--r--drivers/scsi/elx/efct/efct_driver.c11
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c5
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c10
-rw-r--r--drivers/scsi/megaraid.c84
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c11
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h4
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.c87
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.c3
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c7
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.h3
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c7
-rw-r--r--drivers/scsi/qedf/qedf_main.c2
-rw-r--r--drivers/scsi/scsi_lib.c8
-rw-r--r--drivers/scsi/scsi_proc.c4
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/sg.c35
-rw-r--r--drivers/scsi/storvsc_drv.c54
-rw-r--r--drivers/scsi/ufs/ufs-mediatek.c2
-rw-r--r--drivers/scsi/ufs/ufshcd.c2
-rw-r--r--drivers/scsi/virtio_scsi.c2
-rw-r--r--drivers/soc/canaan/Kconfig1
-rw-r--r--drivers/soc/fsl/qbman/bman_portal.c2
-rw-r--r--drivers/soc/fsl/qbman/qman_portal.c2
-rw-r--r--drivers/soc/ti/k3-ringacc.c4
-rw-r--r--drivers/soc/xilinx/Kconfig10
-rw-r--r--drivers/soc/xilinx/Makefile1
-rw-r--r--drivers/soc/xilinx/xlnx_event_manager.c600
-rw-r--r--drivers/soc/xilinx/zynqmp_power.c54
-rw-r--r--drivers/soundwire/qcom.c6
-rw-r--r--drivers/spmi/Kconfig11
-rw-r--r--drivers/spmi/Makefile1
-rw-r--r--drivers/spmi/spmi-mtk-pmif.c542
-rw-r--r--drivers/spmi/spmi-pmic-arb.c193
-rw-r--r--drivers/staging/rts5208/rtsx.c16
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3403_thermal.c1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.h1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c1
-rw-r--r--drivers/tty/n_tty.c2
-rw-r--r--drivers/uio/uio.c8
-rw-r--r--drivers/uio/uio_dmem_genirq.c6
-rw-r--r--drivers/usb/atm/usbatm.c2
-rw-r--r--drivers/usb/gadget/function/f_mass_storage.c2
-rw-r--r--drivers/usb/gadget/function/rndis.c4
-rw-r--r--drivers/vdpa/alibaba/eni_vdpa.c28
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.c41
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.h9
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c40
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c156
-rw-r--r--drivers/vdpa/vdpa.c163
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c21
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c2
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c19
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c16
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c15
-rw-r--r--drivers/vfio/vfio_iommu_type1.c2
-rw-r--r--drivers/vhost/test.c1
-rw-r--r--drivers/vhost/vdpa.c12
-rw-r--r--drivers/video/fbdev/vga16fb.c24
-rw-r--r--drivers/virt/acrn/ioreq.c3
-rw-r--r--drivers/virt/nitro_enclaves/Kconfig9
-rw-r--r--drivers/virt/nitro_enclaves/ne_misc_dev.c174
-rw-r--r--drivers/virt/nitro_enclaves/ne_misc_dev_test.c157
-rw-r--r--drivers/virt/nitro_enclaves/ne_pci_dev.c1
-rw-r--r--drivers/virtio/virtio.c6
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--drivers/virtio/virtio_input.c2
-rw-r--r--drivers/virtio/virtio_mem.c114
-rw-r--r--drivers/virtio/virtio_pci_legacy.c2
-rw-r--r--drivers/virtio/virtio_pci_legacy_dev.c4
-rw-r--r--drivers/virtio/virtio_pci_modern_dev.c2
-rw-r--r--drivers/virtio/virtio_ring.c4
-rw-r--r--drivers/virtio/virtio_vdpa.c7
-rw-r--r--drivers/w1/slaves/w1_ds28e04.c26
-rw-r--r--drivers/w1/slaves/w1_therm.c7
-rw-r--r--drivers/watchdog/Kconfig98
-rw-r--r--drivers/watchdog/Makefile4
-rw-r--r--drivers/watchdog/apple_wdt.c226
-rw-r--r--drivers/watchdog/bcm63xx_wdt.c317
-rw-r--r--drivers/watchdog/bcm7038_wdt.c15
-rw-r--r--drivers/watchdog/da9063_wdt.c12
-rw-r--r--drivers/watchdog/davinci_wdt.c2
-rw-r--r--drivers/watchdog/f71808e_wdt.c10
-rw-r--r--drivers/watchdog/meson_gxbb_wdt.c1
-rw-r--r--drivers/watchdog/msc313e_wdt.c4
-rw-r--r--drivers/watchdog/mtk_wdt.c2
-rw-r--r--drivers/watchdog/realtek_otto_wdt.c384
-rw-r--r--drivers/watchdog/rzg2l_wdt.c263
-rw-r--r--drivers/watchdog/s3c2410_wdt.c338
-rw-r--r--drivers/zorro/proc.c2
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c29
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/aio.c31
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/cachefiles/cache.c17
-rw-r--r--fs/cachefiles/daemon.c11
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c3
-rw-r--r--fs/ceph/file.c24
-rw-r--r--fs/ceph/metric.c2
-rw-r--r--fs/ceph/quota.c17
-rw-r--r--fs/ceph/super.c169
-rw-r--r--fs/ceph/super.h28
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cache.c105
-rw-r--r--fs/cifs/cifs_debug.c8
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_spnego.h3
-rw-r--r--fs/cifs/cifs_swn.c9
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h86
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h39
-rw-r--r--fs/cifs/cifssmb.c94
-rw-r--r--fs/cifs/connect.c326
-rw-r--r--fs/cifs/dfs_cache.c2
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/file.c66
-rw-r--r--fs/cifs/fs_context.c8
-rw-r--r--fs/cifs/fscache.c333
-rw-r--r--fs/cifs/fscache.h128
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c49
-rw-r--r--fs/cifs/netmisc.c5
-rw-r--r--fs/cifs/ntlmssp.h32
-rw-r--r--fs/cifs/sess.c275
-rw-r--r--fs/cifs/smb1ops.c24
-rw-r--r--fs/cifs/smb2glob.h2
-rw-r--r--fs/cifs/smb2misc.c5
-rw-r--r--fs/cifs/smb2ops.c34
-rw-r--r--fs/cifs/smb2pdu.c263
-rw-r--r--fs/cifs/smb2proto.h6
-rw-r--r--fs/cifs/smb2transport.c67
-rw-r--r--fs/cifs/transport.c89
-rw-r--r--fs/coredump.c80
-rw-r--r--fs/dcache.c37
-rw-r--r--fs/eventpoll.c10
-rw-r--r--fs/exec.c52
-rw-r--r--fs/exfat/balloc.c2
-rw-r--r--fs/exfat/dir.c42
-rw-r--r--fs/exfat/exfat_fs.h6
-rw-r--r--fs/exfat/fatent.c4
-rw-r--r--fs/exfat/file.c18
-rw-r--r--fs/exfat/inode.c15
-rw-r--r--fs/exfat/misc.c3
-rw-r--r--fs/exfat/namei.c48
-rw-r--r--fs/exfat/nls.c2
-rw-r--r--fs/exfat/super.c11
-rw-r--r--fs/ext4/extents.c8
-rw-r--r--fs/ext4/inline.c5
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/page-io.c9
-rw-r--r--fs/ext4/readpage.c6
-rw-r--r--fs/ext4/super.c42
-rw-r--r--fs/f2fs/Kconfig1
-rw-r--r--fs/f2fs/checkpoint.c6
-rw-r--r--fs/f2fs/compress.c84
-rw-r--r--fs/f2fs/data.c368
-rw-r--r--fs/f2fs/f2fs.h43
-rw-r--r--fs/f2fs/file.c509
-rw-r--r--fs/f2fs/gc.c31
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c22
-rw-r--r--fs/f2fs/iostat.c40
-rw-r--r--fs/f2fs/node.c27
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c19
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c89
-rw-r--r--fs/f2fs/sysfs.c29
-rw-r--r--fs/f2fs/xattr.c40
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/file_table.c47
-rw-r--r--fs/fs_context.c2
-rw-r--r--fs/fscache/volume.c4
-rw-r--r--fs/fuse/virtio_fs.c4
-rw-r--r--fs/hfsplus/hfsplus_raw.h12
-rw-r--r--fs/hfsplus/xattr.c4
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c88
-rw-r--r--fs/io-wq.c97
-rw-r--r--fs/io-wq.h2
-rw-r--r--fs/io_uring.c79
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/background.c2
-rw-r--r--fs/ksmbd/asn1.c142
-rw-r--r--fs/ksmbd/auth.c27
-rw-r--r--fs/ksmbd/auth.h10
-rw-r--r--fs/ksmbd/connection.c10
-rw-r--r--fs/ksmbd/connection.h12
-rw-r--r--fs/ksmbd/ksmbd_netlink.h12
-rw-r--r--fs/ksmbd/mgmt/user_config.c10
-rw-r--r--fs/ksmbd/mgmt/user_config.h1
-rw-r--r--fs/ksmbd/mgmt/user_session.h1
-rw-r--r--fs/ksmbd/smb2misc.c18
-rw-r--r--fs/ksmbd/smb2ops.c16
-rw-r--r--fs/ksmbd/smb2pdu.c222
-rw-r--r--fs/ksmbd/smb2pdu.h1
-rw-r--r--fs/ksmbd/smb_common.h1
-rw-r--r--fs/ksmbd/transport_ipc.c2
-rw-r--r--fs/ksmbd/transport_rdma.c261
-rw-r--r--fs/ksmbd/transport_rdma.h4
-rw-r--r--fs/ksmbd/transport_tcp.c3
-rw-r--r--fs/ksmbd/vfs_cache.h10
-rw-r--r--fs/lockd/svc.c200
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/locks.c34
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/namei.c58
-rw-r--r--fs/namespace.c24
-rw-r--r--fs/netfs/read_helper.c3
-rw-r--r--fs/nfs/callback.c36
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfsd/filecache.c79
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/netns.h27
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c65
-rw-r--r--fs/nfsd/nfs4proc.c24
-rw-r--r--fs/nfsd/nfs4state.c63
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c27
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c66
-rw-r--r--fs/nfsd/nfsfh.h40
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c222
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h4
-rw-r--r--fs/nfsd/trace.h106
-rw-r--r--fs/nfsd/vfs.c122
-rw-r--r--fs/nfsd/vfs.h3
-rw-r--r--fs/nilfs2/page.c4
-rw-r--r--fs/notify/dnotify/dnotify.c21
-rw-r--r--fs/notify/fanotify/fanotify_user.c10
-rw-r--r--fs/notify/inotify/inotify_user.c11
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs3/ntfs_fs.h1
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c26
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/masklog.c11
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/dlm/dlmthread.c2
-rw-r--r--fs/ocfs2/filecheck.c3
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/stackglue.c25
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/orangefs/orangefs-bufmap.c7
-rw-r--r--fs/orangefs/orangefs-sysfs.c21
-rw-r--r--fs/pipe.c64
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/proc_net.c8
-rw-r--r--fs/proc/proc_sysctl.c72
-rw-r--r--fs/proc/task_mmu.c13
-rw-r--r--fs/proc/vmcore.c10
-rw-r--r--fs/signalfd.c5
-rw-r--r--fs/smbfs_common/smb2pdu.h2
-rw-r--r--fs/smbfs_common/smbfsctl.h2
-rw-r--r--fs/squashfs/super.c33
-rw-r--r--fs/super.c7
-rw-r--r--fs/sysctls.c39
-rw-r--r--fs/tracefs/inode.c24
-rw-r--r--fs/unicode/.gitignore2
-rw-r--r--fs/unicode/Kconfig13
-rw-r--r--fs/unicode/Makefile13
-rw-r--r--fs/unicode/mkutf8data.c24
-rw-r--r--fs/unicode/utf8-core.c109
-rw-r--r--fs/unicode/utf8-norm.c262
-rw-r--r--fs/unicode/utf8-selftest.c94
-rw-r--r--fs/unicode/utf8data.c_shipped (renamed from fs/unicode/utf8data.h_shipped)22
-rw-r--r--fs/unicode/utf8n.h81
-rw-r--r--fs/userfaultfd.c8
-rw-r--r--fs/xfs/kmem.c3
-rw-r--r--fs/xfs/libxfs/xfs_fs.h37
-rw-r--r--fs/xfs/scrub/agheader.c53
-rw-r--r--fs/xfs/scrub/agheader_repair.c12
-rw-r--r--fs/xfs/xfs_bmap_util.c7
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c53
-rw-r--r--fs/xfs/xfs_file.c3
-rw-r--r--fs/xfs/xfs_icache.c22
-rw-r--r--fs/xfs/xfs_ioctl.c102
-rw-r--r--fs/xfs/xfs_ioctl.h6
-rw-r--r--fs/xfs/xfs_ioctl32.c27
-rw-r--r--fs/xfs/xfs_ioctl32.h22
-rw-r--r--include/asm-generic/barrier.h2
-rw-r--r--include/asm-generic/bitops.h1
-rw-r--r--include/asm-generic/bitops/le.h64
-rw-r--r--include/asm-generic/hyperv-tlfs.h33
-rw-r--r--include/asm-generic/mshyperv.h6
-rw-r--r--include/asm-generic/pgalloc.h24
-rw-r--r--include/crypto/blake2s.h3
-rw-r--r--include/dt-bindings/iio/addac/adi,ad74413r.h21
-rw-r--r--include/dt-bindings/interconnect/qcom,msm8996.h163
-rw-r--r--include/dt-bindings/interconnect/qcom,qcm2290.h94
-rw-r--r--include/dt-bindings/interconnect/qcom,sm8450.h171
-rw-r--r--include/dt-bindings/mux/ti-serdes.h22
-rw-r--r--include/dt-bindings/phy/phy-cadence.h9
-rw-r--r--include/dt-bindings/phy/phy-imx8-pcie.h14
-rw-r--r--include/dt-bindings/phy/phy-lan966x-serdes.h14
-rw-r--r--include/kunit/assert.h2
-rw-r--r--include/kvm/arm_vgic.h4
-rw-r--r--include/linux/aio.h4
-rw-r--r--include/linux/bitmap.h34
-rw-r--r--include/linux/bitops.h34
-rw-r--r--include/linux/bpf.h9
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/byteorder/generic.h4
-rw-r--r--include/linux/ceph/libceph.h4
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/cleancache.h124
-rw-r--r--include/linux/comedi/comedi_8254.h (renamed from drivers/comedi/drivers/comedi_8254.h)0
-rw-r--r--include/linux/comedi/comedi_8255.h (renamed from drivers/comedi/drivers/8255.h)8
-rw-r--r--include/linux/comedi/comedi_isadma.h (renamed from drivers/comedi/drivers/comedi_isadma.h)0
-rw-r--r--include/linux/comedi/comedi_pci.h (renamed from drivers/comedi/comedi_pci.h)3
-rw-r--r--include/linux/comedi/comedi_pcmcia.h (renamed from drivers/comedi/comedi_pcmcia.h)3
-rw-r--r--include/linux/comedi/comedi_usb.h (renamed from drivers/comedi/comedi_usb.h)3
-rw-r--r--include/linux/comedi/comedidev.h (renamed from drivers/comedi/comedidev.h)3
-rw-r--r--include/linux/comedi/comedilib.h (renamed from drivers/comedi/comedilib.h)0
-rw-r--r--include/linux/coredump.h10
-rw-r--r--include/linux/counter.h55
-rw-r--r--include/linux/cpumask.h46
-rw-r--r--include/linux/damon.h89
-rw-r--r--include/linux/dcache.h10
-rw-r--r--include/linux/delayacct.h107
-rw-r--r--include/linux/dma/xilinx_dpdma.h2
-rw-r--r--include/linux/dmaengine.h20
-rw-r--r--include/linux/dnotify.h1
-rw-r--r--include/linux/efi.h46
-rw-r--r--include/linux/elfcore-compat.h5
-rw-r--r--include/linux/elfcore.h5
-rw-r--r--include/linux/exportfs.h2
-rw-r--r--include/linux/fanotify.h2
-rw-r--r--include/linux/find.h372
-rw-r--r--include/linux/firmware/xlnx-event-manager.h36
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h33
-rw-r--r--include/linux/fpga/fpga-bridge.h30
-rw-r--r--include/linux/fpga/fpga-mgr.h62
-rw-r--r--include/linux/fpga/fpga-region.h36
-rw-r--r--include/linux/frontswap.h35
-rw-r--r--include/linux/fs.h23
-rw-r--r--include/linux/fs_context.h2
-rw-r--r--include/linux/fscache.h5
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/hash.h5
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--include/linux/hugetlb_cgroup.h7
-rw-r--r--include/linux/hyperv.h6
-rw-r--r--include/linux/iio/buffer-dma.h5
-rw-r--r--include/linux/iio/iio.h5
-rw-r--r--include/linux/iio/trigger.h2
-rw-r--r--include/linux/iio/types.h1
-rw-r--r--include/linux/inotify.h3
-rw-r--r--include/linux/kasan.h4
-rw-r--r--include/linux/kernel.h10
-rw-r--r--include/linux/kprobes.h6
-rw-r--r--include/linux/kthread.h30
-rw-r--r--include/linux/kvm_dirty_ring.h14
-rw-r--r--include/linux/kvm_host.h435
-rw-r--r--include/linux/kvm_types.h19
-rw-r--r--include/linux/libata.h138
-rw-r--r--include/linux/list.h36
-rw-r--r--include/linux/lockd/lockd.h9
-rw-r--r--include/linux/mc146818rtc.h6
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h22
-rw-r--r--include/linux/mempolicy.h1
-rw-r--r--include/linux/memremap.h11
-rw-r--r--include/linux/mhi.h21
-rw-r--r--include/linux/migrate.h2
-rw-r--r--include/linux/mm.h96
-rw-r--r--include/linux/mm_inline.h136
-rw-r--r--include/linux/mm_types.h182
-rw-r--r--include/linux/mmzone.h9
-rw-r--r--include/linux/module.h9
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/of.h422
-rw-r--r--include/linux/page-flags.h43
-rw-r--r--include/linux/page_idle.h1
-rw-r--r--include/linux/page_table_check.h147
-rw-r--r--include/linux/pagevec.h1
-rw-r--r--include/linux/pci.h15
-rw-r--r--include/linux/pci_ids.h50
-rw-r--r--include/linux/percpu.h13
-rw-r--r--include/linux/perf_event.h15
-rw-r--r--include/linux/pgtable.h8
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/platform_data/ad5755.h102
-rw-r--r--include/linux/platform_data/bcm7038_wdt.h8
-rw-r--r--include/linux/pm.h55
-rw-r--r--include/linux/pm_runtime.h24
-rw-r--r--include/linux/poll.h2
-rw-r--r--include/linux/printk.h4
-rw-r--r--include/linux/proc_fs.h25
-rw-r--r--include/linux/profile.h45
-rw-r--r--include/linux/property.h2
-rw-r--r--include/linux/psi.h2
-rw-r--r--include/linux/psi_types.h3
-rw-r--r--include/linux/ref_tracker.h2
-rw-r--r--include/linux/rio_ids.h13
-rw-r--r--include/linux/rwlock.h6
-rw-r--r--include/linux/rwlock_api_smp.h8
-rw-r--r--include/linux/rwlock_rt.h10
-rw-r--r--include/linux/sbitmap.h11
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/sched/mm.h26
-rw-r--r--include/linux/sched/signal.h25
-rw-r--r--include/linux/sched/sysctl.h14
-rw-r--r--include/linux/sched/task.h1
-rw-r--r--include/linux/seq_file.h2
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--include/linux/slab.h3
-rw-r--r--include/linux/spinlock_api_up.h1
-rw-r--r--include/linux/stackdepot.h25
-rw-r--r--include/linux/stackleak.h5
-rw-r--r--include/linux/sunrpc/svc.h79
-rw-r--r--include/linux/swap.h6
-rw-r--r--include/linux/swapfile.h3
-rw-r--r--include/linux/swiotlb.h6
-rw-r--r--include/linux/switchtec.h2
-rw-r--r--include/linux/syscalls.h3
-rw-r--r--include/linux/sysctl.h67
-rw-r--r--include/linux/trace_events.h2
-rw-r--r--include/linux/tracehook.h7
-rw-r--r--include/linux/unaligned/packed_struct.h2
-rw-r--r--include/linux/unicode.h49
-rw-r--r--include/linux/vdpa.h39
-rw-r--r--include/linux/virtio.h1
-rw-r--r--include/linux/vm_event_item.h3
-rw-r--r--include/linux/vmalloc.h7
-rw-r--r--include/net/9p/9p.h2
-rw-r--r--include/net/9p/transport.h2
-rw-r--r--include/net/inet_frag.h11
-rw-r--r--include/net/ipv6_frag.h3
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/sch_generic.h5
-rw-r--r--include/ras/ras_event.h2
-rw-r--r--include/scsi/scsi_device.h5
-rw-r--r--include/scsi/sg.h4
-rw-r--r--include/trace/bpf_probe.h16
-rw-r--r--include/trace/events/cachefiles.h103
-rw-r--r--include/trace/events/compaction.h24
-rw-r--r--include/trace/events/damon.h15
-rw-r--r--include/trace/events/error_report.h8
-rw-r--r--include/trace/events/f2fs.h27
-rw-r--r--include/trace/events/libata.h416
-rw-r--r--include/trace/events/random.h56
-rw-r--r--include/trace/events/sunrpc.h37
-rw-r--r--include/trace/events/thp.h35
-rw-r--r--include/trace/perf.h16
-rw-r--r--include/trace/trace_events.h120
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--include/uapi/linux/comedi.h (renamed from drivers/comedi/comedi.h)2
-rw-r--r--include/uapi/linux/idxd.h1
-rw-r--r--include/uapi/linux/kfd_sysfs.h2
-rw-r--r--include/uapi/linux/kvm.h16
-rw-r--r--include/uapi/linux/magic.h6
-rw-r--r--include/uapi/linux/module.h1
-rw-r--r--include/uapi/linux/pci_regs.h138
-rw-r--r--include/uapi/linux/pfrut.h262
-rw-r--r--include/uapi/linux/prctl.h3
-rw-r--r--include/uapi/linux/soundcard.h2
-rw-r--r--include/uapi/linux/taskstats.h6
-rw-r--r--include/uapi/linux/uuid.h10
-rw-r--r--include/uapi/linux/vdpa.h6
-rw-r--r--include/uapi/misc/habanalabs.h166
-rw-r--r--init/Kconfig14
-rw-r--r--init/Makefile2
-rw-r--r--init/main.c9
-rw-r--r--ipc/util.c2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/inode.c14
-rw-r--r--kernel/bpf/verifier.c81
-rw-r--r--kernel/cgroup/cgroup.c11
-rw-r--r--kernel/configs/debug.config105
-rw-r--r--kernel/delayacct.c49
-rw-r--r--kernel/dma/pool.c4
-rw-r--r--kernel/dma/swiotlb.c50
-rw-r--r--kernel/events/core.c246
-rw-r--r--kernel/exit.c97
-rw-r--r--kernel/fork.c23
-rw-r--r--kernel/futex/core.c2
-rw-r--r--kernel/gcov/Kconfig1
-rw-r--r--kernel/hung_task.c81
-rw-r--r--kernel/irq/proc.c8
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kexec_core.c2
-rw-r--r--kernel/kprobes.c30
-rw-r--r--kernel/kthread.c121
-rw-r--r--kernel/livepatch/core.c29
-rw-r--r--kernel/livepatch/shadow.c6
-rw-r--r--kernel/locking/spinlock.c10
-rw-r--r--kernel/locking/spinlock_rt.c12
-rw-r--r--kernel/module-internal.h19
-rw-r--r--kernel/module.c53
-rw-r--r--kernel/module_decompress.c271
-rw-r--r--kernel/panic.c21
-rw-r--r--kernel/printk/Makefile5
-rw-r--r--kernel/printk/internal.h8
-rw-r--r--kernel/printk/printk.c4
-rw-r--r--kernel/printk/sysctl.c85
-rw-r--r--kernel/profile.c73
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcu/rcutorture.c7
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/sched/core_sched.c2
-rw-r--r--kernel/sched/fair.c118
-rw-r--r--kernel/sched/pelt.h4
-rw-r--r--kernel/sched/psi.c66
-rw-r--r--kernel/signal.c61
-rw-r--r--kernel/stackleak.c26
-rw-r--r--kernel/sys.c79
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c723
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/ftrace.c34
-rw-r--r--kernel/trace/ring_buffer.c7
-rw-r--r--kernel/trace/trace.c88
-rw-r--r--kernel/trace/trace.h83
-rw-r--r--kernel/trace/trace_eprobe.c38
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_events_filter.c139
-rw-r--r--kernel/trace/trace_events_hist.c69
-rw-r--r--kernel/trace/trace_events_inject.c11
-rw-r--r--kernel/trace/trace_events_synth.c15
-rw-r--r--kernel/trace/trace_events_trigger.c424
-rw-r--r--kernel/trace/trace_hwlat.c6
-rw-r--r--kernel/trace/trace_kprobe.c43
-rw-r--r--kernel/trace/trace_osnoise.c26
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/trace/trace_probe.c5
-rw-r--r--kernel/trace/trace_syscalls.c6
-rw-r--r--kernel/trace/trace_uprobe.c39
-rw-r--r--kernel/tsacct.c7
-rw-r--r--kernel/watchdog.c101
-rw-r--r--lib/Kconfig9
-rw-r--r--lib/Kconfig.debug31
-rw-r--r--lib/Kconfig.kasan2
-rw-r--r--lib/Kconfig.ubsan13
-rw-r--r--lib/Makefile3
-rw-r--r--lib/crypto/Kconfig17
-rw-r--r--lib/crypto/blake2s-selftest.c31
-rw-r--r--lib/crypto/blake2s.c37
-rw-r--r--lib/find_bit.c21
-rw-r--r--lib/find_bit_benchmark.c21
-rw-r--r--lib/genalloc.c2
-rw-r--r--lib/kstrtox.c12
-rw-r--r--lib/kunit/try-catch.c4
-rw-r--r--lib/list_debug.c8
-rw-r--r--lib/lz4/lz4defs.h2
-rw-r--r--lib/ref_tracker.c5
-rw-r--r--lib/sbitmap.c25
-rw-r--r--lib/sha1.c95
-rw-r--r--lib/stackdepot.c46
-rw-r--r--lib/test_bitmap.c37
-rw-r--r--lib/test_hash.c259
-rw-r--r--lib/test_hmm.c24
-rw-r--r--lib/test_kasan.c30
-rw-r--r--lib/test_meminit.c1
-rw-r--r--lib/test_sysctl.c22
-rw-r--r--lib/test_ubsan.c22
-rw-r--r--lib/vsprintf.c24
-rw-r--r--mm/Kconfig64
-rw-r--r--mm/Kconfig.debug24
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cleancache.c315
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/damon/core.c45
-rw-r--r--mm/damon/dbgfs.c18
-rw-r--r--mm/damon/paddr.c22
-rw-r--r--mm/damon/prmtv-common.h4
-rw-r--r--mm/damon/reclaim.c46
-rw-r--r--mm/damon/vaddr.c182
-rw-r--r--mm/debug.c52
-rw-r--r--mm/debug_vm_pgtable.c6
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/filemap.c112
-rw-r--r--mm/frontswap.c259
-rw-r--r--mm/gup.c31
-rw-r--r--mm/hmm.c5
-rw-r--r--mm/huge_memory.c32
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/hugetlb_cgroup.c133
-rw-r--r--mm/internal.h7
-rw-r--r--mm/kasan/common.c1
-rw-r--r--mm/kasan/quarantine.c11
-rw-r--r--mm/kasan/shadow.c9
-rw-r--r--mm/khugepaged.c23
-rw-r--r--mm/kmemleak.c21
-rw-r--r--mm/ksm.c5
-rw-r--r--mm/madvise.c494
-rw-r--r--mm/mapping_dirty_helpers.c1
-rw-r--r--mm/memcontrol.c44
-rw-r--r--mm/memory-failure.c187
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mempolicy.c95
-rw-r--r--mm/memremap.c18
-rw-r--r--mm/migrate.c415
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c56
-rw-r--r--mm/mmu_gather.c1
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/oom_kill.c32
-rw-r--r--mm/page_alloc.c197
-rw-r--r--mm/page_counter.c1
-rw-r--r--mm/page_ext.c8
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/page_isolation.c2
-rw-r--r--mm/page_owner.c6
-rw-r--r--mm/page_table_check.c270
-rw-r--r--mm/percpu-internal.h18
-rw-r--r--mm/percpu.c199
-rw-r--r--mm/pgtable-generic.c1
-rw-r--r--mm/rmap.c43
-rw-r--r--mm/shmem.c124
-rw-r--r--mm/slab.h11
-rw-r--r--mm/slab_common.c34
-rw-r--r--mm/swap.c2
-rw-r--r--mm/swapfile.c134
-rw-r--r--mm/truncate.c20
-rw-r--r--mm/userfaultfd.c5
-rw-r--r--mm/util.c15
-rw-r--r--mm/vmalloc.c73
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c3
-rw-r--r--mm/zpool.c12
-rw-r--r--mm/zsmalloc.c529
-rw-r--r--mm/zswap.c8
-rw-r--r--net/9p/Kconfig7
-rw-r--r--net/9p/Makefile5
-rw-r--r--net/9p/client.c7
-rw-r--r--net/9p/mod.c15
-rw-r--r--net/9p/trans_fd.c14
-rw-r--r--net/9p/trans_virtio.c4
-rw-r--r--net/9p/trans_xen.c1
-rw-r--r--net/atm/proc.c4
-rw-r--r--net/bluetooth/af_bluetooth.c8
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/cmtp/core.c2
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/proc.c2
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c45
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c17
-rw-r--r--net/ceph/messenger_v2.c2
-rw-r--r--net/ceph/osdmap.c12
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/of_net.c33
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/fib_semantics.c76
-rw-r--r--net/ipv4/inet_fragment.c8
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/mctp/test/route-test.c2
-rw-r--r--net/ncsi/ncsi-manage.c4
-rw-r--r--net/netfilter/nft_connlimit.c2
-rw-r--r--net/netfilter/nft_last.c2
-rw-r--r--net/netfilter/nft_limit.c2
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/x_tables.c10
-rw-r--r--net/netfilter/xt_hashlimit.c18
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/qrtr/mhi.c2
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/smc/af_smc.c6
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c3
-rw-r--r--net/smc/smc_clc.c2
-rw-r--r--net/smc/smc_core.c137
-rw-r--r--net/smc/smc_core.h12
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_wr.h4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/cache.c24
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c175
-rw-r--r--net/sunrpc/svc_xprt.c11
-rw-r--r--net/tls/tls_sw.c1
-rw-r--r--net/unix/garbage.c14
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/vmw_vsock/virtio_transport.c4
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--samples/Kconfig9
-rw-r--r--samples/Makefile1
-rw-r--r--samples/bpf/offwaketime_kern.c4
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c11
-rw-r--r--samples/bpf/test_overhead_tp_kern.c5
-rw-r--r--samples/coresight/Makefile4
-rw-r--r--samples/coresight/coresight-cfg-sample.c73
-rw-r--r--samples/trace_events/trace-events-sample.c3
-rw-r--r--samples/trace_events/trace-events-sample.h35
-rw-r--r--scripts/.gitignore1
-rw-r--r--scripts/Kbuild.include47
-rw-r--r--scripts/Makefile17
-rw-r--r--scripts/Makefile.lib35
-rw-r--r--scripts/Makefile.modinst4
-rw-r--r--scripts/Makefile.ubsan1
-rwxr-xr-xscripts/checkpatch.pl54
-rw-r--r--scripts/coccinelle/iterators/fen.cocci124
-rw-r--r--scripts/coccinelle/misc/bugon.cocci63
-rw-r--r--scripts/const_structs.checkpatch23
-rwxr-xr-xscripts/dtc/dtx_diff8
-rwxr-xr-xscripts/gen_autoksyms.sh11
-rwxr-xr-xscripts/get_maintainer.pl2
-rw-r--r--scripts/kconfig/Makefile3
-rw-r--r--scripts/kconfig/conf.c17
-rw-r--r--scripts/kconfig/confdata.c24
-rwxr-xr-xscripts/kconfig/streamline_config.pl2
-rwxr-xr-xscripts/link-vmlinux.sh55
-rwxr-xr-xscripts/min-tool-version.sh2
-rw-r--r--scripts/mod/modpost.c15
-rwxr-xr-xscripts/remove-stale-files2
-rwxr-xr-xscripts/setlocalversion9
-rw-r--r--scripts/sorttable.c40
-rw-r--r--scripts/sorttable.h125
-rw-r--r--scripts/spelling.txt1
-rwxr-xr-xscripts/tags.sh126
-rw-r--r--sound/core/info.c4
-rw-r--r--sound/core/init.c25
-rw-r--r--sound/core/misc.c2
-rw-r--r--sound/pci/hda/cs35l41_hda.c134
-rw-r--r--sound/pci/hda/cs35l41_hda.h4
-rw-r--r--sound/pci/hda/cs35l41_hda_i2c.c6
-rw-r--r--sound/pci/hda/cs35l41_hda_spi.c6
-rw-r--r--sound/pci/hda/patch_cs8409-tables.c2
-rw-r--r--sound/pci/hda/patch_realtek.c12
-rw-r--r--sound/usb/mixer_maps.c12
-rw-r--r--sound/virtio/virtio_card.c4
-rw-r--r--tools/accounting/getdelays.c8
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/include/asm/msr-index.h17
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h16
-rw-r--r--tools/arch/x86/include/uapi/asm/prctl.h26
-rw-r--r--tools/arch/x86/lib/memcpy_64.S12
-rw-r--r--tools/arch/x86/lib/memset_64.S6
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c4
-rw-r--r--tools/build/Build.include2
-rw-r--r--tools/iio/iio_event_monitor.c1
-rw-r--r--tools/include/asm-generic/bitops.h1
-rw-r--r--tools/include/asm-generic/bitops/find.h145
-rw-r--r--tools/include/linux/bitmap.h7
-rw-r--r--tools/include/linux/find.h (renamed from include/asm-generic/bitops/find.h)54
-rw-r--r--tools/include/linux/hash.h5
-rw-r--r--tools/include/uapi/asm-generic/unistd.h5
-rw-r--r--tools/include/uapi/drm/drm.h18
-rw-r--r--tools/include/uapi/linux/kvm.h16
-rw-r--r--tools/include/uapi/linux/perf_event.h5
-rw-r--r--tools/lib/find_bit.c20
-rw-r--r--tools/lib/perf/Documentation/libperf.txt11
-rw-r--r--tools/lib/perf/cpumap.c113
-rw-r--r--tools/lib/perf/evlist.c19
-rw-r--r--tools/lib/perf/evsel.c115
-rw-r--r--tools/lib/perf/include/internal/cpumap.h18
-rw-r--r--tools/lib/perf/include/internal/evlist.h5
-rw-r--r--tools/lib/perf/include/internal/evsel.h4
-rw-r--r--tools/lib/perf/include/internal/mmap.h5
-rw-r--r--tools/lib/perf/include/perf/cpumap.h8
-rw-r--r--tools/lib/perf/include/perf/evsel.h14
-rw-r--r--tools/lib/perf/libperf.map2
-rw-r--r--tools/lib/perf/mmap.c4
-rw-r--r--tools/lib/perf/tests/test-evlist.c162
-rw-r--r--tools/lib/traceevent/event-parse.c59
-rw-r--r--tools/lib/traceevent/event-parse.h5
-rw-r--r--tools/lib/traceevent/parse-filter.c5
-rw-r--r--tools/objtool/check.c8
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt5
-rw-r--r--tools/perf/Documentation/perf-config.txt9
-rw-r--r--tools/perf/Documentation/perf-list.txt48
-rw-r--r--tools/perf/Documentation/perf-record.txt15
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt7
-rw-r--r--tools/perf/Makefile.config10
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h42
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c54
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h78
-rw-r--r--tools/perf/arch/arm64/util/machine.c7
-rw-r--r--tools/perf/arch/arm64/util/pmu.c2
-rw-r--r--tools/perf/arch/csky/include/perf_regs.h82
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/mips/include/perf_regs.h69
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h66
-rw-r--r--tools/perf/arch/powerpc/util/event.c8
-rw-r--r--tools/perf/arch/riscv/include/perf_regs.h74
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h78
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h82
-rw-r--r--tools/perf/arch/x86/util/evlist.c17
-rw-r--r--tools/perf/bench/epoll-ctl.c4
-rw-r--r--tools/perf/bench/epoll-wait.c4
-rw-r--r--tools/perf/bench/evlist-open-close.c4
-rw-r--r--tools/perf/bench/futex-hash.c4
-rw-r--r--tools/perf/bench/futex-lock-pi.c4
-rw-r--r--tools/perf/bench/futex-requeue.c4
-rw-r--r--tools/perf/bench/futex-wake-parallel.c4
-rw-r--r--tools/perf/bench/futex-wake.c4
-rw-r--r--tools/perf/builtin-bench.c5
-rw-r--r--tools/perf/builtin-buildid-cache.c25
-rw-r--r--tools/perf/builtin-c2c.c15
-rw-r--r--tools/perf/builtin-ftrace.c447
-rw-r--r--tools/perf/builtin-inject.c5
-rw-r--r--tools/perf/builtin-kmem.c2
-rw-r--r--tools/perf/builtin-record.c23
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/builtin-sched.c71
-rw-r--r--tools/perf/builtin-script.c43
-rw-r--r--tools/perf/builtin-stat.c557
-rw-r--r--tools/perf/builtin-trace.c5
-rw-r--r--tools/perf/dlfilters/dlfilter-test-api-v0.c2
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json20
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json155
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json47
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json143
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json38
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json5
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json23
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json29
-rw-r--r--tools/perf/pmu-events/arch/arm64/common-and-microarch.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json)198
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/arm64/recommended.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-recommended.json)202
-rw-r--r--tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json16
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/attr.c6
-rw-r--r--tools/perf/tests/bitmap.c4
-rw-r--r--tools/perf/tests/builtin-test.c16
-rw-r--r--tools/perf/tests/cpumap.c6
-rw-r--r--tools/perf/tests/event_update.c8
-rw-r--r--tools/perf/tests/mem2node.c9
-rw-r--r--tools/perf/tests/mmap-basic.c5
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c39
-rw-r--r--tools/perf/tests/parse-events.c49
-rw-r--r--tools/perf/tests/pmu-events.c32
-rwxr-xr-xtools/perf/tests/shell/stat_all_metricgroups.sh2
-rw-r--r--tools/perf/tests/sigtrap.c177
-rw-r--r--tools/perf/tests/stat.c3
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/tests/topology.c58
-rw-r--r--tools/perf/ui/browsers/annotate.c23
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/affinity.c10
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h1
-rw-r--r--tools/perf/util/arm-spe.c67
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c63
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h10
-rw-r--r--tools/perf/util/auxtrace.c14
-rw-r--r--tools/perf/util/auxtrace.h5
-rw-r--r--tools/perf/util/bpf-loader.c15
-rw-r--r--tools/perf/util/bpf_counter.c29
-rw-r--r--tools/perf/util/bpf_counter.h4
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c10
-rw-r--r--tools/perf/util/bpf_ftrace.c152
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c114
-rw-r--r--tools/perf/util/callchain.c14
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/counts.c8
-rw-r--r--tools/perf/util/counts.h14
-rw-r--r--tools/perf/util/cpumap.c253
-rw-r--r--tools/perf/util/cpumap.h125
-rw-r--r--tools/perf/util/cputopo.c11
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/env.c29
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/evlist-hybrid.c11
-rw-r--r--tools/perf/util/evlist.c174
-rw-r--r--tools/perf/util/evlist.h52
-rw-r--r--tools/perf/util/evsel.c205
-rw-r--r--tools/perf/util/evsel.h33
-rw-r--r--tools/perf/util/expr.c37
-rw-r--r--tools/perf/util/ftrace.h81
-rw-r--r--tools/perf/util/header.c6
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h3
-rw-r--r--tools/perf/util/libunwind/arm64.c2
-rw-r--r--tools/perf/util/machine.c53
-rw-r--r--tools/perf/util/machine.h1
-rw-r--r--tools/perf/util/mem-events.c29
-rw-r--r--tools/perf/util/metricgroup.c46
-rw-r--r--tools/perf/util/mmap.c19
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/namespaces.c76
-rw-r--r--tools/perf/util/namespaces.h2
-rw-r--r--tools/perf/util/parse-events-hybrid.c9
-rw-r--r--tools/perf/util/parse-events.c77
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y17
-rw-r--r--tools/perf/util/perf_api_probe.c19
-rw-r--r--tools/perf/util/perf_regs.c666
-rw-r--r--tools/perf/util/perf_regs.h17
-rw-r--r--tools/perf/util/probe-event.c3
-rw-r--r--tools/perf/util/python.c12
-rw-r--r--tools/perf/util/record.c13
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c22
-rw-r--r--tools/perf/util/session.c37
-rw-r--r--tools/perf/util/smt.c73
-rw-r--r--tools/perf/util/sort.c36
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c138
-rw-r--r--tools/perf/util/stat-shadow.c308
-rw-r--r--tools/perf/util/stat.c47
-rw-r--r--tools/perf/util/stat.h9
-rw-r--r--tools/perf/util/svghelper.c10
-rw-r--r--tools/perf/util/synthetic-events.c24
-rw-r--r--tools/perf/util/synthetic-events.h3
-rw-r--r--tools/perf/util/top.c6
-rw-r--r--tools/perf/util/util.c15
-rw-r--r--tools/perf/util/util.h11
-rw-r--r--tools/power/acpi/.gitignore1
-rw-r--r--tools/power/acpi/Makefile16
-rw-r--r--tools/power/acpi/Makefile.rules2
-rw-r--r--tools/power/acpi/man/pfrut.8137
-rw-r--r--tools/power/acpi/tools/pfrut/Makefile23
-rw-r--r--tools/power/acpi/tools/pfrut/pfrut.c424
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_types.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c2
-rw-r--r--tools/testing/selftests/kvm/.gitignore6
-rw-r--r--tools/testing/selftests/kvm/Makefile22
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c50
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c853
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h26
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.h)12
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h3
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h18
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h409
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h399
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h135
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h59
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h51
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c66
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h11
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c206
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c82
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c103
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c59
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c136
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c362
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c87
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c229
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c448
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/get_cpuid_test.c)30
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c434
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c59
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c139
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c218
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh16
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh3
-rw-r--r--tools/testing/selftests/net/settings2
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh34
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c42
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c46
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh21
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c31
-rw-r--r--tools/testing/selftests/vm/write_hugetlb_memory.sh2
-rw-r--r--tools/tracing/rtla/Makefile102
-rw-r--r--tools/tracing/rtla/README.txt36
-rw-r--r--tools/tracing/rtla/src/osnoise.c875
-rw-r--r--tools/tracing/rtla/src/osnoise.h91
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c801
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c579
-rw-r--r--tools/tracing/rtla/src/rtla.c87
-rw-r--r--tools/tracing/rtla/src/timerlat.c72
-rw-r--r--tools/tracing/rtla/src/timerlat.h4
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c822
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c618
-rw-r--r--tools/tracing/rtla/src/trace.c192
-rw-r--r--tools/tracing/rtla/src/trace.h27
-rw-r--r--tools/tracing/rtla/src/utils.c433
-rw-r--r--tools/tracing/rtla/src/utils.h56
-rw-r--r--usr/Makefile7
-rw-r--r--usr/include/Makefile7
-rwxr-xr-xusr/include/headers_check.pl (renamed from scripts/headers_check.pl)0
-rw-r--r--virt/kvm/Kconfig6
-rw-r--r--virt/kvm/Makefile.kvm14
-rw-r--r--virt/kvm/async_pf.c2
-rw-r--r--virt/kvm/dirty_ring.c11
-rw-r--r--virt/kvm/kvm_main.c1071
-rw-r--r--virt/kvm/kvm_mm.h44
-rw-r--r--virt/kvm/mmu_lock.h23
-rw-r--r--virt/kvm/pfncache.c337
2578 files changed, 80167 insertions, 30969 deletions
diff --git a/.mailmap b/.mailmap
index a83599921b1a..b157f88ce26a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -49,10 +49,12 @@ Andy Adamson <andros@citi.umich.edu>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
+Anup Patel <anup@brainfault.org> <anup.patel@wdc.com>
Archit Taneja <archit@ti.com>
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
+Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
diff --git a/CREDITS b/CREDITS
index d8f63e8329e8..b97256d5bc24 100644
--- a/CREDITS
+++ b/CREDITS
@@ -315,6 +315,11 @@ S: Via Delle Palme, 9
S: Terni 05100
S: Italy
+N: Ohad Ben Cohen
+E: ohad@wizery.com
+D: Remote Processor (remoteproc) subsystem
+D: Remote Processor Messaging (rpmsg) subsystem
+
N: Krzysztof Benedyczak
E: golbi@mat.uni.torun.pl
W: http://www.mat.uni.torun.pl/~golbi
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index df4afbccf037..0c2b613f2373 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -41,14 +41,14 @@ KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: The maximum number of groups can be created under this device.
-What: /sys/bus/dsa/devices/dsa<m>/max_tokens
-Date: Oct 25, 2019
-KernelVersion: 5.6.0
+What: /sys/bus/dsa/devices/dsa<m>/max_read_buffers
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
-Description: The total number of bandwidth tokens supported by this device.
- The bandwidth tokens represent resources within the DSA
+Description: The total number of read buffers supported by this device.
+ The read buffers represent resources within the DSA
implementation, and these resources are allocated by engines to
- support operations.
+ support operations. See DSA spec v1.2 9.2.4 Total Read Buffers.
What: /sys/bus/dsa/devices/dsa<m>/max_transfer_size
Date: Oct 25, 2019
@@ -115,13 +115,13 @@ KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: To indicate if this device is configurable or not.
-What: /sys/bus/dsa/devices/dsa<m>/token_limit
-Date: Oct 25, 2019
-KernelVersion: 5.6.0
+What: /sys/bus/dsa/devices/dsa<m>/read_buffer_limit
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
-Description: The maximum number of bandwidth tokens that may be in use at
+Description: The maximum number of read buffers that may be in use at
one time by operations that access low bandwidth memory in the
- device.
+ device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit.
What: /sys/bus/dsa/devices/dsa<m>/cmd_status
Date: Aug 28, 2020
@@ -220,8 +220,38 @@ Contact: dmaengine@vger.kernel.org
Description: Show the current number of entries in this WQ if WQ Occupancy
Support bit WQ capabilities is 1.
+What: /sys/bus/dsa/devices/wq<m>.<n>/enqcmds_retries
+Date Oct 29, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicate the number of retires for an enqcmds submission on a sharedwq.
+ A max value to set attribute is capped at 64.
+
What: /sys/bus/dsa/devices/engine<m>.<n>/group_id
Date: Oct 25, 2019
KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: The group that this engine belongs to.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/use_read_buffer_limit
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Enable the use of global read buffer limit for the group. See DSA
+ spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_allowed
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicates max number of read buffers that may be in use at one time
+ by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read
+ Buffers Allowed.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_reserved
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicates the number of Read Buffers reserved for the use of
+ engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
+ Reserved.
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 63c46d9d538f..2667cbf940f3 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -21,11 +21,11 @@ Description: Allow the root user to disable/enable in runtime the clock
a different engine to disable/enable its clock gating feature.
The bitmask is composed of 20 bits:
- ======= ============
+ ======= ============
0 - 7 DMA channels
8 - 11 MME engines
12 - 19 TPC engines
- ======= ============
+ ======= ============
The bit's location of a specific engine can be determined
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
@@ -155,6 +155,13 @@ Description: Triggers an I2C transaction that is generated by the device's
CPU. Writing to this file generates a write transaction while
reading from the file generates a read transaction
+What: /sys/kernel/debug/habanalabs/hl<n>/i2c_len
+Date: Dec 2021
+KernelVersion: 5.17
+Contact: obitton@habana.ai
+Description: Sets I2C length in bytes for I2C transaction that is generated by
+ the device's CPU
+
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
Date: Jan 2019
KernelVersion: 5.1
@@ -226,12 +233,6 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
-What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
-Date: Sep 2021
-KernelVersion: 5.16
-Contact: obitton@habana.ai
-Description: Sets the command submission timeout value in seconds.
-
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
@@ -239,6 +240,12 @@ Contact: ogabbay@kernel.org
Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
+What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+Date: Sep 2021
+KernelVersion: 5.16
+Contact: obitton@habana.ai
+Description: Sets the command submission timeout value in seconds.
+
What: /sys/kernel/debug/habanalabs/hl<n>/userptr
Date: Jan 2019
KernelVersion: 5.1
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
new file mode 100644
index 000000000000..f6c035752639
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
@@ -0,0 +1,16 @@
+What: /sys/bus/iio/devices/iio:deviceX/filter_mode_available
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading this returns the valid values that can be written to the
+ on_altvoltage0_mode attribute:
+
+ - auto -> Adjust bandpass filter to track changes in input clock rate.
+ - manual -> disable/unregister the clock rate notifier / input clock tracking.
+
+What: /sys/bus/iio/devices/iio:deviceX/filter_mode
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute configures the filter mode.
+ Reading returns the actual mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
new file mode 100644
index 000000000000..de1e323e5d47
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
@@ -0,0 +1,38 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_i_calibphase
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write unscaled value for the Local Oscillatior path quadrature I phase shift.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_q_calibphase
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write unscaled value for the Local Oscillatior path quadrature Q phase shift.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the Local Oscillatior Feedthrough Offset Calibration I Positive
+ side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the Local Oscillatior Feedthrough Offset Calibration Q Positive side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage1_i_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write raw value for the Local Oscillatior Feedthrough Offset Calibration I Negative
+ side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage1_q_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write raw value for the Local Oscillatior Feedthrough Offset Calibration Q Negative
+ side.
diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa
new file mode 100644
index 000000000000..28a6111202ba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-vdpa
@@ -0,0 +1,57 @@
+What: /sys/bus/vdpa/driver_autoprobe
+Date: March 2020
+Contact: virtualization@lists.linux-foundation.org
+Description:
+ This file determines whether new devices are immediately bound
+ to a driver after the creation. It initially contains 1, which
+ means the kernel automatically binds devices to a compatible
+ driver immediately after they are created.
+
+ Writing "0" to this file disable this feature, any other string
+ enable it.
+
+What: /sys/bus/vdpa/driver_probe
+Date: March 2020
+Contact: virtualization@lists.linux-foundation.org
+Description:
+ Writing a device name to this file will cause the kernel binds
+ devices to a compatible driver.
+
+ This can be useful when /sys/bus/vdpa/driver_autoprobe is
+ disabled.
+
+What: /sys/bus/vdpa/drivers/.../bind
+Date: March 2020
+Contact: virtualization@lists.linux-foundation.org
+Description:
+ Writing a device name to this file will cause the driver to
+ attempt to bind to the device. This is useful for overriding
+ default bindings.
+
+What: /sys/bus/vdpa/drivers/.../unbind
+Date: March 2020
+Contact: virtualization@lists.linux-foundation.org
+Description:
+ Writing a device name to this file will cause the driver to
+ attempt to unbind from the device. This may be useful when
+ overriding default bindings.
+
+What: /sys/bus/vdpa/devices/.../driver_override
+Date: November 2021
+Contact: virtualization@lists.linux-foundation.org
+Description:
+ This file allows the driver for a device to be specified.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind to
+ the device. The override is specified by writing a string to the
+ driver_override file (echo vhost-vdpa > driver_override) and may
+ be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device will
+ not bind to any driver. This also allows devices to opt-out of
+ driver binding using a driver_override name such as "none".
+ Only a single driver may be specified in the override, there is
+ no support for parsing delimiters.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index b268e3e18b4a..2416b03ff283 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -112,6 +112,11 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Set timeout to issue discard commands during umount.
Default: 5 secs
+What: /sys/fs/f2fs/<disk>/pending_discard
+Date: November 2021
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of pending discard commands in the queue.
+
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -528,3 +533,10 @@ Description: With "mode=fragment:block" mount options, we can scatter block allo
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
+
+What: /sys/fs/f2fs/<disk>/gc_urgent_high_remaining
+Date: December 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: You can set the trial count limit for GC urgent high mode with this value.
+ If GC thread gets to the limit, the mode will turn back to GC normal mode.
+ By default, the value is zero, which means there is no limit like before.
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 1b8b46deeb29..197fe319cbec 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -13,6 +13,8 @@ a) waiting for a CPU (while being runnable)
b) completion of synchronous block I/O initiated by the task
c) swapping in pages
d) memory reclaim
+e) thrashing page cache
+f) direct compact
and makes these statistics available to userspace through
the taskstats interface.
@@ -41,11 +43,12 @@ generic data structure to userspace corresponding to per-pid and per-tgid
statistics. The delay accounting functionality populates specific fields of
this structure. See
- include/linux/taskstats.h
+ include/uapi/linux/taskstats.h
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
+cache, direct compact etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -88,41 +91,37 @@ seen.
General format of the getdelays command::
- getdelays [-t tgid] [-p pid] [-c cmd...]
-
+ getdelays [-dilv] [-t tgid] [-p pid]
Get delays, since system boot, for pid 10::
- # ./getdelays -p 10
+ # ./getdelays -d -p 10
(output similar to next case)
Get sum of delays, since system boot, for all pids with tgid 5::
- # ./getdelays -t 5
-
-
- CPU count real total virtual total delay total
- 7876 92005750 100000000 24001500
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
+ # ./getdelays -d -t 5
+ print delayacct stats ON
+ TGID 5
-Get delays seen in executing a given simple command::
- # ./getdelays -c ls /
+ CPU count real total virtual total delay total delay average
+ 8 7000000 6872122 3382277 0.423ms
+ IO count delay total delay average
+ 0 0 0ms
+ SWAP count delay total delay average
+ 0 0 0ms
+ RECLAIM count delay total delay average
+ 0 0 0ms
+ THRASHING count delay total delay average
+ 0 0 0ms
+ COMPACT count delay total delay average
+ 0 0 0ms
- bin data1 data3 data5 dev home media opt root srv sys usr
- boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
+Get IO accounting for pid 1, it works only with -p::
+ # ./getdelays -i -p 1
+ printing IO accounting
+ linuxrc: read=65536, write=0, cancelled_write=0
- CPU count real total virtual total delay total
- 6 4000250 4000000 0
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
+The above command can be used with -v to get more debug information.
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index f2b3439edcc2..860fe651d645 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
for the same psi metric can be specified. However for each trigger a separate
file descriptor is required to be able to poll it separately from others,
therefore for each trigger a separate open() syscall should be made even
-when opening the same psi interface file.
+when opening the same psi interface file. Write operations to a file descriptor
+with an already existing psi trigger will fail with EBUSY.
Monitors activate only when system enters stall state for the monitored
psi metric and deactivates upon exit from the stall state. While system is
diff --git a/Documentation/admin-guide/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
index 338f2c7d7a1c..0fa724d82abb 100644
--- a/Documentation/admin-guide/cgroup-v1/hugetlb.rst
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@@ -29,12 +29,14 @@ Brief summary of control files::
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB usage limit
+ hugetlb.<hugepagesize>.numa_stat # show the numa information of the hugetlb memory charged to this cgroup
For a system supporting three hugepage sizes (64k, 32M and 1G), the control
files include::
hugetlb.1GB.limit_in_bytes
hugetlb.1GB.max_usage_in_bytes
+ hugetlb.1GB.numa_stat
hugetlb.1GB.usage_in_bytes
hugetlb.1GB.failcnt
hugetlb.1GB.rsvd.limit_in_bytes
@@ -43,6 +45,7 @@ files include::
hugetlb.1GB.rsvd.failcnt
hugetlb.64KB.limit_in_bytes
hugetlb.64KB.max_usage_in_bytes
+ hugetlb.64KB.numa_stat
hugetlb.64KB.usage_in_bytes
hugetlb.64KB.failcnt
hugetlb.64KB.rsvd.limit_in_bytes
@@ -51,6 +54,7 @@ files include::
hugetlb.64KB.rsvd.failcnt
hugetlb.32MB.limit_in_bytes
hugetlb.32MB.max_usage_in_bytes
+ hugetlb.32MB.numa_stat
hugetlb.32MB.usage_in_bytes
hugetlb.32MB.failcnt
hugetlb.32MB.rsvd.limit_in_bytes
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 2aeb7ae8b393..5aa368d165da 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1268,6 +1268,9 @@ PAGE_SIZE multiple when read back.
The number of processes belonging to this cgroup
killed by any kind of OOM killer.
+ oom_group_kill
+ The number of times a group OOM has occurred.
+
memory.events.local
Similar to memory.events but the fields in the file are local
to the cgroup i.e. not hierarchical. The file modified event
@@ -1311,6 +1314,9 @@ PAGE_SIZE multiple when read back.
sock (npn)
Amount of memory used in network transmission buffers
+ vmalloc (npn)
+ Amount of memory used for vmap backed memory.
+
shmem
Amount of cached filesystem data that is swap-backed,
such as tmpfs, shm segments, shared anonymous mmap()s
@@ -2260,6 +2266,11 @@ HugeTLB Interface Files
are local to the cgroup i.e. not hierarchical. The file modified event
generated on this file reflects only the local events.
+ hugetlb.<hugepagesize>.numa_stat
+ Similar to memory.numa_stat, it shows the numa information of the
+ hugetlb pages of <hugepagesize> in this cgroup. Only active in
+ use hugetlb pages are included. The per-node values are in bytes.
+
Misc
----
diff --git a/Documentation/admin-guide/mm/damon/reclaim.rst b/Documentation/admin-guide/mm/damon/reclaim.rst
index fb9def3a7355..0af51a9705b1 100644
--- a/Documentation/admin-guide/mm/damon/reclaim.rst
+++ b/Documentation/admin-guide/mm/damon/reclaim.rst
@@ -208,6 +208,31 @@ PID of the DAMON thread.
If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. Else,
-1.
+nr_reclaim_tried_regions
+------------------------
+
+Number of memory regions that tried to be reclaimed by DAMON_RECLAIM.
+
+bytes_reclaim_tried_regions
+---------------------------
+
+Total bytes of memory regions that tried to be reclaimed by DAMON_RECLAIM.
+
+nr_reclaimed_regions
+--------------------
+
+Number of memory regions that successfully be reclaimed by DAMON_RECLAIM.
+
+bytes_reclaimed_regions
+-----------------------
+
+Total bytes of memory regions that successfully be reclaimed by DAMON_RECLAIM.
+
+nr_quota_exceeds
+----------------
+
+Number of times that the time/space quota limits have exceeded.
+
Example
=======
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index ed96bbf0daff..59b84904a854 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -7,37 +7,40 @@ Detailed Usages
DAMON provides below three interfaces for different users.
- *DAMON user space tool.*
- This is for privileged people such as system administrators who want a
- just-working human-friendly interface. Using this, users can use the DAMON’s
- major features in a human-friendly way. It may not be highly tuned for
- special cases, though. It supports both virtual and physical address spaces
- monitoring.
+ `This <https://github.com/awslabs/damo>`_ is for privileged people such as
+ system administrators who want a just-working human-friendly interface.
+ Using this, users can use the DAMON’s major features in a human-friendly way.
+ It may not be highly tuned for special cases, though. It supports both
+ virtual and physical address spaces monitoring. For more detail, please
+ refer to its `usage document
+ <https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
- *debugfs interface.*
- This is for privileged user space programmers who want more optimized use of
- DAMON. Using this, users can use DAMON’s major features by reading
- from and writing to special debugfs files. Therefore, you can write and use
- your personalized DAMON debugfs wrapper programs that reads/writes the
- debugfs files instead of you. The DAMON user space tool is also a reference
- implementation of such programs. It supports both virtual and physical
- address spaces monitoring.
+ :ref:`This <debugfs_interface>` is for privileged user space programmers who
+ want more optimized use of DAMON. Using this, users can use DAMON’s major
+ features by reading from and writing to special debugfs files. Therefore,
+ you can write and use your personalized DAMON debugfs wrapper programs that
+ reads/writes the debugfs files instead of you. The `DAMON user space tool
+ <https://github.com/awslabs/damo>`_ is one example of such programs. It
+ supports both virtual and physical address spaces monitoring. Note that this
+ interface provides only simple :ref:`statistics <damos_stats>` for the
+ monitoring results. For detailed monitoring results, DAMON provides a
+ :ref:`tracepoint <tracepoint>`.
- *Kernel Space Programming Interface.*
- This is for kernel space programmers. Using this, users can utilize every
- feature of DAMON most flexibly and efficiently by writing kernel space
- DAMON application programs for you. You can even extend DAMON for various
- address spaces.
+ :doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
+ users can utilize every feature of DAMON most flexibly and efficiently by
+ writing kernel space DAMON application programs for you. You can even extend
+ DAMON for various address spaces. For detail, please refer to the interface
+ :doc:`document </vm/damon/api>`.
-Nevertheless, you could write your own user space tool using the debugfs
-interface. A reference implementation is available at
-https://github.com/awslabs/damo. If you are a kernel programmer, you could
-refer to :doc:`/vm/damon/api` for the kernel space programming interface. For
-the reason, this document describes only the debugfs interface
+
+.. _debugfs_interface:
debugfs Interface
=================
-DAMON exports five files, ``attrs``, ``target_ids``, ``init_regions``,
-``schemes`` and ``monitor_on`` under its debugfs directory,
-``<debugfs>/damon/``.
+DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
+``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
+``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
Attributes
@@ -131,24 +134,38 @@ Schemes
For usual DAMON-based data access aware memory management optimizations, users
would simply want the system to apply a memory management action to a memory
-region of a specific size having a specific access frequency for a specific
-time. DAMON receives such formalized operation schemes from the user and
-applies those to the target processes. It also counts the total number and
-size of regions that each scheme is applied. This statistics can be used for
-online analysis or tuning of the schemes.
+region of a specific access pattern. DAMON receives such formalized operation
+schemes from the user and applies those to the target processes.
Users can get and set the schemes by reading from and writing to ``schemes``
debugfs file. Reading the file also shows the statistics of each scheme. To
-the file, each of the schemes should be represented in each line in below form:
+the file, each of the schemes should be represented in each line in below
+form::
+
+ <target access pattern> <action> <quota> <watermarks>
+
+You can disable schemes by simply writing an empty string to the file.
+
+Target Access Pattern
+~~~~~~~~~~~~~~~~~~~~~
+
+The ``<target access pattern>`` is constructed with three ranges in below
+form::
+
+ min-size max-size min-acc max-acc min-age max-age
- min-size max-size min-acc max-acc min-age max-age action
+Specifically, bytes for the size of regions (``min-size`` and ``max-size``),
+number of monitored accesses per aggregate interval for access frequency
+(``min-acc`` and ``max-acc``), number of aggregate intervals for the age of
+regions (``min-age`` and ``max-age``) are specified. Note that the ranges are
+closed interval.
-Note that the ranges are closed interval. Bytes for the size of regions
-(``min-size`` and ``max-size``), number of monitored accesses per aggregate
-interval for access frequency (``min-acc`` and ``max-acc``), number of
-aggregate intervals for the age of regions (``min-age`` and ``max-age``), and a
-predefined integer for memory management actions should be used. The supported
-numbers and their meanings are as below.
+Action
+~~~~~~
+
+The ``<action>`` is a predefined integer for memory management actions, which
+DAMON will apply to the regions having the target access pattern. The
+supported numbers and their meanings are as below.
- 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
- 1: Call ``madvise()`` for the region with ``MADV_COLD``
@@ -157,20 +174,82 @@ numbers and their meanings are as below.
- 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
- 5: Do nothing but count the statistics
-You can disable schemes by simply writing an empty string to the file. For
-example, below commands applies a scheme saying "If a memory region of size in
-[4KiB, 8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
-interval in [10, 20], page out the region", check the entered scheme again, and
-finally remove the scheme. ::
+Quota
+~~~~~
- # cd <debugfs>/damon
- # echo "4096 8192 0 5 10 20 2" > schemes
- # cat schemes
- 4096 8192 0 5 10 20 2 0 0
- # echo > schemes
+Optimal ``target access pattern`` for each ``action`` is workload dependent, so
+not easy to find. Worse yet, setting a scheme of some action too aggressive
+can cause severe overhead. To avoid such overhead, users can limit time and
+size quota for the scheme via the ``<quota>`` in below form::
+
+ <ms> <sz> <reset interval> <priority weights>
+
+This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying
+the action to memory regions of the ``target access pattern`` within the
+``<reset interval>`` milliseconds, and to apply the action to only up to
+``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
+``<ms>`` and ``<sz>`` zero disables the quota limits.
+
+When the quota limit is expected to be exceeded, DAMON prioritizes found memory
+regions of the ``target access pattern`` based on their size, access frequency,
+and age. For personalized prioritization, users can set the weights for the
+three properties in ``<priority weights>`` in below form::
+
+ <size weight> <access frequency weight> <age weight>
+
+Watermarks
+~~~~~~~~~~
+
+Some schemes would need to run based on current value of the system's specific
+metrics like free memory ratio. For such cases, users can specify watermarks
+for the condition.::
+
+ <metric> <check interval> <high mark> <middle mark> <low mark>
+
+``<metric>`` is a predefined integer for the metric to be checked. The
+supported numbers and their meanings are as below.
+
+ - 0: Ignore the watermarks
+ - 1: System's free memory rate (per thousand)
+
+The value of the metric is checked every ``<check interval>`` microseconds.
+
+If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the
+scheme is deactivated. If the value is lower than ``<mid mark>``, the scheme
+is activated.
+
+.. _damos_stats:
+
+Statistics
+~~~~~~~~~~
+
+It also counts the total number and bytes of regions that each scheme is tried
+to be applied, the two numbers for the regions that each scheme is successfully
+applied, and the total number of the quota limit exceeds. This statistics can
+be used for online analysis or tuning of the schemes.
+
+The statistics can be shown by reading the ``schemes`` file. Reading the file
+will show each scheme you entered in each line, and the five numbers for the
+statistics will be added at the end of each line.
-The last two integers in the 4th line of above example is the total number and
-the total size of the regions that the scheme is applied.
+Example
+~~~~~~~
+
+Below commands applies a scheme saying "If a memory region of size in [4KiB,
+8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
+interval in [10, 20], page out the region. For the paging out, use only up to
+10ms per second, and also don't page out more than 1GiB per second. Under the
+limitation, page out memory regions having longer age first. Also, check the
+free memory rate of the system every 5 seconds, start the monitoring and paging
+out when the free memory rate becomes lower than 50%, but stop it if the free
+memory rate becomes larger than 60%, or lower than 30%".::
+
+ # cd <debugfs>/damon
+ # scheme="4096 8192 0 5 10 20 2" # target access pattern and action
+ # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
+ # scheme+=" 0 0 100" # prioritization weights
+ # scheme+=" 1 5000000 600 500 300" # watermarks
+ # echo "$scheme" > schemes
Turning On/Off
@@ -195,6 +274,54 @@ the monitoring is turned on. If you write to the files while DAMON is running,
an error code such as ``-EBUSY`` will be returned.
+Monitoring Thread PID
+---------------------
+
+DAMON does requested monitoring with a kernel thread called ``kdamond``. You
+can get the pid of the thread by reading the ``kdamond_pid`` file. When the
+monitoring is turned off, reading the file returns ``none``. ::
+
+ # cd <debugfs>/damon
+ # cat monitor_on
+ off
+ # cat kdamond_pid
+ none
+ # echo on > monitor_on
+ # cat kdamond_pid
+ 18594
+
+
+Using Multiple Monitoring Threads
+---------------------------------
+
+One ``kdamond`` thread is created for each monitoring context. You can create
+and remove monitoring contexts for multiple ``kdamond`` required use case using
+the ``mk_contexts`` and ``rm_contexts`` files.
+
+Writing the name of the new context to the ``mk_contexts`` file creates a
+directory of the name on the DAMON debugfs directory. The directory will have
+DAMON debugfs files for the context. ::
+
+ # cd <debugfs>/damon
+ # ls foo
+ # ls: cannot access 'foo': No such file or directory
+ # echo foo > mk_contexts
+ # ls foo
+ # attrs init_regions kdamond_pid schemes target_ids
+
+If the context is not needed anymore, you can remove it and the corresponding
+directory by putting the name of the context to the ``rm_contexts`` file. ::
+
+ # echo foo > rm_contexts
+ # ls foo
+ # ls: cannot access 'foo': No such file or directory
+
+Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the
+root directory only.
+
+
+.. _tracepoint:
+
Tracepoint for Monitoring Results
=================================
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index 64fd0ba0d057..5a6afecbb0d0 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -408,7 +408,7 @@ follows:
Memory Policy APIs
==================
-Linux supports 3 system calls for controlling memory policy. These APIS
+Linux supports 4 system calls for controlling memory policy. These APIS
always affect only the calling task, the calling task's address space, or
some shared object mapped into the calling task's address space.
@@ -460,6 +460,20 @@ requested via the 'flags' argument.
See the mbind(2) man page for more details.
+Set home node for a Range of Task's Address Spacec::
+
+ long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
+ unsigned long home_node,
+ unsigned long flags);
+
+sys_set_mempolicy_home_node set the home node for a VMA policy present in the
+task's address range. The system call updates the home node only for the existing
+mempolicy range. Other address ranges are ignored. A home node is the NUMA node
+closest to which page allocation will come from. Specifying the home node override
+the default allocation policy to allocate memory close to the local node for an
+executing CPU.
+
+
Memory Policy Command Line Interface
====================================
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 5e795202111f..f4804ce37c58 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -948,7 +948,7 @@ how much memory needs to be free before kswapd goes back to sleep.
The unit is in fractions of 10,000. The default value of 10 means the
distances between watermarks are 0.1% of the available memory in the
-node/system. The maximum value is 1000, or 10% of memory.
+node/system. The maximum value is 3000, or 30% of memory.
A high rate of threads entering direct reclaim (allocstall) or kswapd
going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
index cf5a208f2f10..343598c9f473 100644
--- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
@@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller
maintainers:
- Neil Armstrong <narmstrong@baylibre.com>
+allOf:
+ - $ref: /schemas/sound/name-prefix.yaml#
+
description: |
The Amlogic Meson Synopsys Designware Integration is composed of
- A Synopsys DesignWare HDMI Controller IP
@@ -99,6 +102,8 @@ properties:
"#sound-dai-cells":
const: 0
+ sound-name-prefix: true
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
index 851cb0781217..047fd69e0377 100644
--- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
@@ -78,6 +78,10 @@ properties:
interrupts:
maxItems: 1
+ amlogic,canvas:
+ description: should point to a canvas provider node
+ $ref: /schemas/types.yaml#/definitions/phandle
+
power-domains:
maxItems: 1
description: phandle to the associated power domain
@@ -106,6 +110,7 @@ required:
- port@1
- "#address-cells"
- "#size-cells"
+ - amlogic,canvas
additionalProperties: false
@@ -118,6 +123,7 @@ examples:
interrupts = <3>;
#address-cells = <1>;
#size-cells = <0>;
+ amlogic,canvas = <&canvas>;
/* CVBS VDAC output port */
port@0 {
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
index 8e13f27b28ed..bce96b5b0db0 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
@@ -7,7 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Analogix ANX7814 SlimPort (Full-HD Transmitter)
maintainers:
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
+ - Andrzej Hajda <andrzej.hajda@intel.com>
+ - Neil Armstrong <narmstrong@baylibre.com>
+ - Robert Foss <robert.foss@linaro.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
index 9f7cc6b757cb..a88a5d8c7ba5 100644
--- a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
@@ -8,7 +8,6 @@ title: ChromeOS EC ANX7688 HDMI to DP Converter through Type-C Port
maintainers:
- Nicolas Boichat <drinkcat@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
ChromeOS EC ANX7688 is a display bridge that converts HDMI 2.0 to
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index cdaf7a7a8f88..186e17be51fb 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -8,7 +8,6 @@ title: MIPI DSI to eDP Video Format Converter Device Tree Bindings
maintainers:
- Nicolas Boichat <drinkcat@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
The PS8640 is a low power MIPI-to-eDP video format converter supporting
diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
index a108029ecfab..acd2f3faa6b9 100644
--- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Asia Better Technology 3.0" (320x480 pixels) 24-bit IPS LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
index e89c1ea62ffa..7d221ef35443 100644
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
@@ -15,11 +15,9 @@ description: |
960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and
VCOMH outputs.
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
index cda36c04e85c..72788e3e6c59 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
+++ b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Innolux EJ030NA 3.0" (320x480 pixels) 24-bit TFT LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
index c45c92a3d41f..2a2756d19681 100644
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
+++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Paul Cercueil <paul@crapouillou.net>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
index 830e335ddb53..5e4e0e552c2f 100644
--- a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
+++ b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: LG.Philips LB035Q02 Panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Tomi Valkeinen <tomi.valkeinen@ti.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
index 060ee27a4749..d525165d6d63 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Andrzej Hajda <a.hajda@samsung.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
@@ -63,8 +60,6 @@ examples:
lcd@0 {
compatible = "samsung,ld9040";
- #address-cells = <1>;
- #size-cells = <0>;
reg = <0>;
vdd3-supply = <&ldo7_reg>;
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
index ea58df49263a..940f7f88526f 100644
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
@@ -12,6 +12,7 @@ maintainers:
allOf:
- $ref: panel-common.yaml#
- $ref: /schemas/leds/backlight/common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
index fa46d151e7b3..9e1d707c2ace 100644
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Sitronix ST7789V RGB panel with SPI control bus
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Maxime Ripard <mripard@kernel.org>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
index 95d053c548ab..98abdf4ddeac 100644
--- a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
@@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Sony ACX565AKM SDI Panel
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Tomi Valkeinen <tomi.valkeinen@ti.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
index 4aa605613445..f902a9d74141 100644
--- a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
+++ b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
@@ -6,16 +6,13 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Toppoly TD Panels
-description: |
- The panel must obey the rules for a SPI slave device as specified in
- spi/spi-controller.yaml
-
maintainers:
- Marek Belisko <marek@goldelico.com>
- H. Nikolaus Schaller <hns@goldelico.com>
allOf:
- $ref: panel-common.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
index 008c144257cb..1a68a940d165 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
@@ -26,14 +26,6 @@ properties:
clock-names:
const: hclk
- pinctrl-0:
- maxItems: 2
-
- pinctrl-names:
- const: default
- description:
- Switch the iomux for the HPD/I2C pins to HDMI function.
-
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/dma/arm,pl330.yaml b/Documentation/devicetree/bindings/dma/arm,pl330.yaml
new file mode 100644
index 000000000000..decab185cf4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/arm,pl330.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/arm,pl330.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM PrimeCell PL330 DMA Controller
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+description:
+ The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
+ between memory and peripherals or memory to memory.
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,pl330
+ required:
+ - compatible
+
+allOf:
+ - $ref: dma-controller.yaml#
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - arm,pl330
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 32
+ description: A single combined interrupt or an interrupt per event
+
+ '#dma-cells':
+ const: 1
+ description: Contains the DMA request number for the consumer
+
+ arm,pl330-broken-no-flushp:
+ type: boolean
+ description: quirk for avoiding to execute DMAFLUSHP
+
+ arm,pl330-periph-burst:
+ type: boolean
+ description: quirk for performing burst transfer only
+
+ dma-coherent: true
+
+ resets:
+ minItems: 1
+ maxItems: 2
+
+ reset-names:
+ minItems: 1
+ items:
+ - const: dma
+ - const: dma-ocp
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ dma-controller@12680000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x12680000 0x1000>;
+ interrupts = <99>;
+ #dma-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.yaml b/Documentation/devicetree/bindings/dma/arm-pl08x.yaml
index 3bd9eea543ca..9193b18fb75f 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl08x.yaml
+++ b/Documentation/devicetree/bindings/dma/arm-pl08x.yaml
@@ -10,6 +10,7 @@ maintainers:
- Vinod Koul <vkoul@kernel.org>
allOf:
+ - $ref: /schemas/arm/primecell.yaml#
- $ref: "dma-controller.yaml#"
# We need a select here so we don't match all nodes with 'arm,primecell'
@@ -89,6 +90,9 @@ properties:
- 64
description: bus width used for memcpy in bits. FTDMAC020 also accept 64 bits
+ resets:
+ maxItems: 1
+
required:
- reg
- interrupts
diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt
deleted file mode 100644
index 315e90122afa..000000000000
--- a/Documentation/devicetree/bindings/dma/arm-pl330.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* ARM PrimeCell PL330 DMA Controller
-
-The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
-between memory and peripherals or memory to memory.
-
-Required properties:
- - compatible: should include both "arm,pl330" and "arm,primecell".
- - reg: physical base address of the controller and length of memory mapped
- region.
- - interrupts: interrupt number to the cpu.
-
-Optional properties:
- - dma-coherent : Present if dma operations are coherent
- - #dma-cells: must be <1>. used to represent the number of integer
- cells in the dmas property of client device.
- - dma-channels: contains the total number of DMA channels supported by the DMAC
- - dma-requests: contains the total number of DMA requests supported by the DMAC
- - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP
- - arm,pl330-periph-burst: quirk for performing burst transfer only
- - resets: contains an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
- - reset-names: must contain at least "dma", and optional is "dma-ocp".
-
-Example:
-
- pdma0: pdma@12680000 {
- compatible = "arm,pl330", "arm,primecell";
- reg = <0x12680000 0x1000>;
- interrupts = <99>;
- #dma-cells = <1>;
- #dma-channels = <8>;
- #dma-requests = <32>;
- };
-
-Client drivers (device nodes requiring dma transfers from dev-to-mem or
-mem-to-dev) should specify the DMA channel numbers and dma channel names
-as shown below.
-
- [property name] = <[phandle of the dma controller] [dma request id]>;
- [property name] = <[dma channel name]>
-
- where 'dma request id' is the dma request number which is connected
- to the client controller. The 'property name' 'dmas' and 'dma-names'
- as required by the generic dma device tree binding helpers. The dma
- names correspond 1:1 with the dma request ids in the dmas property.
-
- Example: dmas = <&pdma0 12
- &pdma1 11>;
- dma-names = "tx", "rx";
diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml
index 0043b91da95e..6d3727267fa8 100644
--- a/Documentation/devicetree/bindings/dma/dma-controller.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml
@@ -24,10 +24,10 @@ examples:
dma: dma-controller@48000000 {
compatible = "ti,omap-sdma";
reg = <0x48000000 0x1000>;
- interrupts = <0 12 0x4
- 0 13 0x4
- 0 14 0x4
- 0 15 0x4>;
+ interrupts = <0 12 0x4>,
+ <0 13 0x4>,
+ <0 14 0x4>,
+ <0 15 0x4>;
#dma-cells = <1>;
dma-channels = <32>;
dma-requests = <127>;
diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
index dc059d6fd037..3b0b3b919af8 100644
--- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
+++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
@@ -14,15 +14,23 @@ allOf:
properties:
compatible:
- enum:
- - ingenic,jz4740-dma
- - ingenic,jz4725b-dma
- - ingenic,jz4760-dma
- - ingenic,jz4760b-dma
- - ingenic,jz4770-dma
- - ingenic,jz4780-dma
- - ingenic,x1000-dma
- - ingenic,x1830-dma
+ oneOf:
+ - enum:
+ - ingenic,jz4740-dma
+ - ingenic,jz4725b-dma
+ - ingenic,jz4760-dma
+ - ingenic,jz4760-bdma
+ - ingenic,jz4760-mdma
+ - ingenic,jz4760b-dma
+ - ingenic,jz4760b-bdma
+ - ingenic,jz4760b-mdma
+ - ingenic,jz4770-dma
+ - ingenic,jz4780-dma
+ - ingenic,x1000-dma
+ - ingenic,x1830-dma
+ - items:
+ - const: ingenic,jz4770-bdma
+ - const: ingenic,jz4760b-bdma
reg:
items:
@@ -36,13 +44,19 @@ properties:
maxItems: 1
"#dma-cells":
- const: 2
+ enum: [2, 3]
description: >
DMA clients must use the format described in dma.txt, giving a phandle
- to the DMA controller plus the following 2 integer cells:
-
- - Request type: The DMA request type for transfers to/from the
- device on the allocated channel, as defined in the SoC documentation.
+ to the DMA controller plus the following integer cells:
+
+ - Request type: The DMA request type specifies the device endpoint that
+ will be the source or destination of the DMA transfer.
+ If "#dma-cells" is 2, the request type is a single cell, and the
+ direction will be unidirectional (either RX or TX but not both).
+ If "#dma-cells" is 3, the request type has two cells; the first
+ one corresponds to the host to device direction (TX), the second one
+ corresponds to the device to host direction (RX). The DMA channel is
+ then bidirectional.
- Channel: If set to 0xffffffff, any available channel will be allocated
for the client. Otherwise, the exact channel specified will be used.
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
index d8142cbd13d3..7c6badf39921 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
@@ -44,6 +44,10 @@ properties:
- items:
- const: renesas,dmac-r8a779a0 # R-Car V3U
+ - items:
+ - const: renesas,dmac-r8a779f0 # R-Car S4-8
+ - const: renesas,rcar-gen4-dmac
+
reg: true
interrupts:
@@ -118,6 +122,7 @@ if:
contains:
enum:
- renesas,dmac-r8a779a0
+ - renesas,rcar-gen4-dmac
then:
properties:
reg:
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml
index 79e241498e25..4324a94b26b2 100644
--- a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml
@@ -53,6 +53,9 @@ properties:
minimum: 1
maximum: 8
+ resets:
+ maxItems: 1
+
snps,dma-masters:
description: |
Number of AXI masters supported by the hardware.
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
index df29d59d13a8..08627d91e607 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
@@ -30,6 +30,7 @@ description: |
allOf:
- $ref: /schemas/dma/dma-controller.yaml#
+ - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
index ea19d12a9337..507d16d84ade 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
@@ -25,6 +25,7 @@ description: |
allOf:
- $ref: /schemas/dma/dma-controller.yaml#
+ - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml
index 4c5396a9744f..6b61a8cf6137 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at24.yaml
@@ -87,6 +87,10 @@ properties:
- items:
pattern: cs1024$
- items:
+ pattern: c1025$
+ - items:
+ pattern: cs1025$
+ - items:
pattern: c2048$
- items:
pattern: cs2048$
@@ -95,18 +99,21 @@ properties:
# These are special cases that don't conform to the above pattern.
# Each requires a standard at24 model as fallback.
- items:
- - const: nxp,se97b
- - const: atmel,24c02
+ - enum:
+ - rohm,br24g01
+ - rohm,br24t01
+ - const: atmel,24c01
- items:
- - const: onnn,cat24c04
- - const: atmel,24c04
+ - enum:
+ - nxp,se97b
+ - renesas,r1ex24002
+ - const: atmel,24c02
- items:
- - const: onnn,cat24c05
+ - enum:
+ - onnn,cat24c04
+ - onnn,cat24c05
- const: atmel,24c04
- items:
- - const: renesas,r1ex24002
- - const: atmel,24c02
- - items:
- const: renesas,r1ex24016
- const: atmel,24c16
- items:
@@ -115,12 +122,6 @@ properties:
- items:
- const: renesas,r1ex24128
- const: atmel,24c128
- - items:
- - const: rohm,br24g01
- - const: atmel,24c01
- - items:
- - const: rohm,br24t01
- - const: atmel,24c01
label:
description: Descriptive name of the EEPROM.
diff --git a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
index 20e1ccfc8630..2d82b44268db 100644
--- a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
+++ b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml
@@ -8,7 +8,6 @@ title: ChromeOS EC USB Type-C cable and accessories detection
maintainers:
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
On ChromeOS systems with USB Type C ports, the ChromeOS Embedded Controller is
diff --git a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
deleted file mode 100644
index a8a35df41951..000000000000
--- a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Broadcom BCM2835 I2C controller
-
-Required properties:
-- compatible : Should be one of:
- "brcm,bcm2711-i2c"
- "brcm,bcm2835-i2c"
-- reg: Should contain register location and length.
-- interrupts: Should contain interrupt.
-- clocks : The clock feeding the I2C controller.
-
-Recommended properties:
-- clock-frequency : desired I2C bus clock frequency in Hz.
-
-Example:
-
-i2c@7e205000 {
- compatible = "brcm,bcm2835-i2c";
- reg = <0x7e205000 0x1000>;
- interrupts = <2 21>;
- clocks = <&clk_i2c>;
- clock-frequency = <100000>;
-};
diff --git a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.yaml b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.yaml
new file mode 100644
index 000000000000..8256490a7af2
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/brcm,bcm2835-i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2835 I2C controller
+
+maintainers:
+ - Stephen Warren <swarren@wwwdotorg.org>
+
+allOf:
+ - $ref: /schemas/i2c/i2c-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - brcm,bcm2835-i2c
+ - items:
+ - const: brcm,bcm2711-i2c
+ - const: brcm,bcm2835-i2c
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-frequency: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c@7e205000 {
+ compatible = "brcm,bcm2835-i2c";
+ reg = <0x7e205000 0x1000>;
+ interrupts = <2 21>;
+ clocks = <&clk_i2c>;
+ clock-frequency = <100000>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
index b386e4128a79..6e1c70e9275e 100644
--- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
+++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml
@@ -10,7 +10,6 @@ title: I2C bus that tunnels through the ChromeOS EC (cros-ec)
maintainers:
- Doug Anderson <dianders@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
On some ChromeOS board designs we've got a connection to the EC
diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt b/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
deleted file mode 100644
index 2dbc0b62daa6..000000000000
--- a/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Samsung's High Speed I2C controller
-
-The Samsung's High Speed I2C controller is used to interface with I2C devices
-at various speeds ranging from 100khz to 3.4Mhz.
-
-Required properties:
- - compatible: value should be.
- -> "samsung,exynos5-hsi2c", (DEPRECATED)
- for i2c compatible with HSI2C available
- on Exynos5250 and Exynos5420 SoCs.
- -> "samsung,exynos5250-hsi2c", for i2c compatible with HSI2C available
- on Exynos5250 and Exynos5420 SoCs.
- -> "samsung,exynos5260-hsi2c", for i2c compatible with HSI2C available
- on Exynos5260 SoCs.
- -> "samsung,exynos7-hsi2c", for i2c compatible with HSI2C available
- on Exynos7 SoCs.
-
- - reg: physical base address of the controller and length of memory mapped
- region.
- - interrupts: interrupt number to the cpu.
- - #address-cells: always 1 (for i2c addresses)
- - #size-cells: always 0
-
- - Pinctrl:
- - pinctrl-0: Pin control group to be used for this controller.
- - pinctrl-names: Should contain only one value - "default".
-
-Optional properties:
- - clock-frequency: Desired operating frequency in Hz of the bus.
- -> If not specified, the bus operates in fast-speed mode at
- at 100khz.
- -> If specified, the bus operates in high-speed mode only if the
- clock-frequency is >= 1Mhz.
-
-Example:
-
-hsi2c@12ca0000 {
- compatible = "samsung,exynos5250-hsi2c";
- reg = <0x12ca0000 0x100>;
- interrupts = <56>;
- clock-frequency = <100000>;
-
- pinctrl-0 = <&i2c4_bus>;
- pinctrl-names = "default";
-
- #address-cells = <1>;
- #size-cells = <0>;
-
- s2mps11_pmic@66 {
- compatible = "samsung,s2mps11-pmic";
- reg = <0x66>;
- };
-};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
new file mode 100644
index 000000000000..19874e8b73b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/i2c-exynos5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung's High Speed I2C controller
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ The Samsung's High Speed I2C controller is used to interface with I2C devices
+ at various speeds ranging from 100kHz to 3.4MHz.
+
+ In case the HSI2C controller is encapsulated within USI block (it's the case
+ e.g. for Exynos850 and Exynos Auto V9 SoCs), it might be also necessary to
+ define USI node in device tree file, choosing "i2c" configuration. Please see
+ Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - samsung,exynos5250-hsi2c # Exynos5250 and Exynos5420
+ - samsung,exynos5260-hsi2c # Exynos5260
+ - samsung,exynos7-hsi2c # Exynos7
+ - samsung,exynosautov9-hsi2c # ExynosAutoV9 and Exynos850
+ - const: samsung,exynos5-hsi2c # Exynos5250 and Exynos5420
+ deprecated: true
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-frequency:
+ default: 100000
+ description:
+ Desired operating frequency in Hz of the bus.
+
+ If not specified, the bus operates in fast-speed mode at 100kHz.
+
+ If specified, the bus operates in high-speed mode only if the
+ clock-frequency is >= 1MHz.
+
+ clocks:
+ minItems: 1
+ items:
+ - description: I2C operating clock
+ - description: Bus clock (APB)
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: hsi2c
+ - const: hsi2c_pclk
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+allOf:
+ - $ref: /schemas/i2c/i2c-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynosautov9-hsi2c
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ minItems: 2
+
+ required:
+ - clock-names
+
+ else:
+ properties:
+ clocks:
+ maxItems: 1
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5420.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ hsi2c_8: i2c@12e00000 {
+ compatible = "samsung,exynos5250-hsi2c";
+ reg = <0x12e00000 0x1000>;
+ interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ clocks = <&clock CLK_USI4>;
+ clock-names = "hsi2c";
+
+ pmic@66 {
+ /* compatible = "samsung,s2mps11-pmic"; */
+ reg = <0x66>;
+ };
+ };
+
+ - |
+ #include <dt-bindings/clock/exynos850.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ hsi2c_2: i2c@138c0000 {
+ compatible = "samsung,exynosautov9-hsi2c";
+ reg = <0x138c0000 0xc0>;
+ interrupts = <GIC_SPI 195 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&cmu_peri CLK_GOUT_HSI2C2_IPCLK>,
+ <&cmu_peri CLK_GOUT_HSI2C2_PCLK>;
+ clock-names = "hsi2c", "hsi2c_pclk";
+
+ pmic@66 {
+ /* compatible = "samsung,s2mps11-pmic"; */
+ reg = <0x66>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
index fe0c89edf7c1..529bea56d324 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
@@ -20,7 +20,9 @@ properties:
- items:
- enum:
- fsl,imx8qxp-lpi2c
+ - fsl,imx8dxl-lpi2c
- fsl,imx8qm-lpi2c
+ - fsl,imx8ulp-lpi2c
- const: fsl,imx7ulp-lpi2c
reg:
diff --git a/Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml b/Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
index 1ca571056ea9..925f355cc21f 100644
--- a/Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: ADC found on Freescale vf610 and similar SoCs
maintainers:
- - Fugang Duan <fugang.duan@nxp.com>
+ - Haibo Chen <haibo.chen@nxp.com>
description:
ADCs found on vf610/i.MX6slx and upward SoCs from Freescale.
diff --git a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
index e759a5da708d..d6d3d8590171 100644
--- a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
@@ -27,6 +27,7 @@ description: |
8 | batt_v
9 | batt_chrg_i
10 | batt_dischrg_i
+ 11 | ts_v
AXP22x
------
@@ -34,6 +35,7 @@ description: |
1 | batt_v
2 | batt_chrg_i
3 | batt_dischrg_i
+ 4 | ts_v
AXP813
------
@@ -42,6 +44,7 @@ description: |
2 | batt_v
3 | batt_chrg_i
4 | batt_dischrg_i
+ 5 | ts_v
properties:
diff --git a/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml
new file mode 100644
index 000000000000..87992db389b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml
@@ -0,0 +1,227 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/xlnx,zynqmp-ams.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Zynq Ultrascale AMS controller
+
+maintainers:
+ - Anand Ashok Dumbre <anand.ashok.dumbre@xilinx.com>
+
+description: |
+ The AMS (Analog Monitoring System) includes an ADC as well as on-chip sensors
+ that can be used to sample external voltages and monitor on-die operating
+ conditions, such as temperature and supply voltage levels.
+ The AMS has two SYSMON blocks which are PL (Programmable Logic) SYSMON and
+ PS (Processing System) SYSMON.
+ All designs should have AMS registers, but PS and PL are optional. The
+ AMS controller can work with only PS, only PL and both PS and PL
+ configurations. Please specify registers according to your design. Devicetree
+ should always have AMS module property. Providing PS & PL module is optional.
+
+ AMS Channel Details
+ ```````````````````
+ Sysmon Block |Channel| Details |Measurement
+ |Number | |Type
+ ---------------------------------------------------------------------------------------------------------
+ AMS CTRL |0 |System PLLs voltage measurement, VCC_PSPLL. |Voltage
+ |1 |Battery voltage measurement, VCC_PSBATT. |Voltage
+ |2 |PL Internal voltage measurement, VCCINT. |Voltage
+ |3 |Block RAM voltage measurement, VCCBRAM. |Voltage
+ |4 |PL Aux voltage measurement, VCCAUX. |Voltage
+ |5 |Voltage measurement for six DDR I/O PLLs, VCC_PSDDR_PLL. |Voltage
+ |6 |VCC_PSINTFP_DDR voltage measurement. |Voltage
+ ---------------------------------------------------------------------------------------------------------
+ PS Sysmon |7 |LPD temperature measurement. |Temperature
+ |8 |FPD temperature measurement (REMOTE). |Temperature
+ |9 |VCC PS LPD voltage measurement (supply1). |Voltage
+ |10 |VCC PS FPD voltage measurement (supply2). |Voltage
+ |11 |PS Aux voltage reference (supply3). |Voltage
+ |12 |DDR I/O VCC voltage measurement. |Voltage
+ |13 |PS IO Bank 503 voltage measurement (supply5). |Voltage
+ |14 |PS IO Bank 500 voltage measurement (supply6). |Voltage
+ |15 |VCCO_PSIO1 voltage measurement. |Voltage
+ |16 |VCCO_PSIO2 voltage measurement. |Voltage
+ |17 |VCC_PS_GTR voltage measurement (VPS_MGTRAVCC). |Voltage
+ |18 |VTT_PS_GTR voltage measurement (VPS_MGTRAVTT). |Voltage
+ |19 |VCC_PSADC voltage measurement. |Voltage
+ ---------------------------------------------------------------------------------------------------------
+ PL Sysmon |20 |PL temperature measurement. |Temperature
+ |21 |PL Internal voltage measurement, VCCINT. |Voltage
+ |22 |PL Auxiliary voltage measurement, VCCAUX. |Voltage
+ |23 |ADC Reference P+ voltage measurement. |Voltage
+ |24 |ADC Reference N- voltage measurement. |Voltage
+ |25 |PL Block RAM voltage measurement, VCCBRAM. |Voltage
+ |26 |LPD Internal voltage measurement, VCC_PSINTLP (supply4). |Voltage
+ |27 |FPD Internal voltage measurement, VCC_PSINTFP (supply5). |Voltage
+ |28 |PS Auxiliary voltage measurement (supply6). |Voltage
+ |29 |PL VCCADC voltage measurement (vccams). |Voltage
+ |30 |Differential analog input signal voltage measurment. |Voltage
+ |31 |VUser0 voltage measurement (supply7). |Voltage
+ |32 |VUser1 voltage measurement (supply8). |Voltage
+ |33 |VUser2 voltage measurement (supply9). |Voltage
+ |34 |VUser3 voltage measurement (supply10). |Voltage
+ |35 |Auxiliary ch 0 voltage measurement (VAux0). |Voltage
+ |36 |Auxiliary ch 1 voltage measurement (VAux1). |Voltage
+ |37 |Auxiliary ch 2 voltage measurement (VAux2). |Voltage
+ |38 |Auxiliary ch 3 voltage measurement (VAux3). |Voltage
+ |39 |Auxiliary ch 4 voltage measurement (VAux4). |Voltage
+ |40 |Auxiliary ch 5 voltage measurement (VAux5). |Voltage
+ |41 |Auxiliary ch 6 voltage measurement (VAux6). |Voltage
+ |42 |Auxiliary ch 7 voltage measurement (VAux7). |Voltage
+ |43 |Auxiliary ch 8 voltage measurement (VAux8). |Voltage
+ |44 |Auxiliary ch 9 voltage measurement (VAux9). |Voltage
+ |45 |Auxiliary ch 10 voltage measurement (VAux10). |Voltage
+ |46 |Auxiliary ch 11 voltage measurement (VAux11). |Voltage
+ |47 |Auxiliary ch 12 voltage measurement (VAux12). |Voltage
+ |48 |Auxiliary ch 13 voltage measurement (VAux13). |Voltage
+ |49 |Auxiliary ch 14 voltage measurement (VAux14). |Voltage
+ |50 |Auxiliary ch 15 voltage measurement (VAux15). |Voltage
+ --------------------------------------------------------------------------------------------------------
+
+properties:
+ compatible:
+ enum:
+ - xlnx,zynqmp-ams
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ description: AMS Controller register space
+ maxItems: 1
+
+ ranges:
+ description:
+ Maps the child address space for PS and/or PL.
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+ '#io-channel-cells':
+ const: 1
+
+ ams-ps@0:
+ type: object
+ description: |
+ PS (Processing System) SYSMON is memory mapped to PS. This block has
+ built-in alarm generation logic that is used to interrupt the processor
+ based on condition set.
+
+ properties:
+ compatible:
+ enum:
+ - xlnx,zynqmp-ams-ps
+
+ reg:
+ description: Register Space for PS-SYSMON
+ maxItems: 1
+
+ required:
+ - compatible
+ - reg
+
+ additionalProperties: false
+
+ ams-pl@400:
+ type: object
+ description:
+ PL-SYSMON is capable of monitoring off chip voltage and temperature.
+ PL-SYSMON block has DRP, JTAG and I2C interface to enable monitoring
+ from external master. Out of this interface currently only DRP is
+ supported. This block has alarm generation logic that is used to
+ interrupt the processor based on condition set.
+
+ properties:
+ compatible:
+ items:
+ - enum:
+ - xlnx,zynqmp-ams-pl
+
+ reg:
+ description: Register Space for PL-SYSMON.
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ patternProperties:
+ "^channel@([2-4][0-9]|50)$":
+ type: object
+ description:
+ Describes the external channels connected.
+
+ properties:
+ reg:
+ description:
+ Pair of pins the channel is connected to. This value is
+ same as Channel Number for a particular channel.
+ minimum: 20
+ maximum: 50
+
+ xlnx,bipolar:
+ $ref: /schemas/types.yaml#/definitions/flag
+ type: boolean
+ description:
+ If the set channel is used in bipolar mode.
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ xilinx_ams: ams@ffa50000 {
+ compatible = "xlnx,zynqmp-ams";
+ interrupt-parent = <&gic>;
+ interrupts = <0 56 4>;
+ reg = <0x0 0xffa50000 0x0 0x800>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #io-channel-cells = <1>;
+ ranges = <0 0 0xffa50800 0x800>;
+
+ ams_ps: ams-ps@0 {
+ compatible = "xlnx,zynqmp-ams-ps";
+ reg = <0 0x400>;
+ };
+
+ ams_pl: ams-pl@400 {
+ compatible = "xlnx,zynqmp-ams-pl";
+ reg = <0x400 0x400>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ channel@30 {
+ reg = <30>;
+ xlnx,bipolar;
+ };
+ channel@31 {
+ reg = <31>;
+ };
+ channel@38 {
+ reg = <38>;
+ xlnx,bipolar;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
new file mode 100644
index 000000000000..baa65a521bad
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/addac/adi,ad74413r.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD74412R/AD74413R device
+
+maintainers:
+ - Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+description: |
+ The AD74412R and AD74413R are quad-channel software configurable input/output
+ solutions for building and process control applications. They contain
+ functionality for analog output, analog input, digital input, resistance
+ temperature detector, and thermocouple measurements integrated
+ into a single chip solution with an SPI interface.
+ The devices feature a 16-bit ADC and four configurable 13-bit DACs to provide
+ four configurable input/output channels and a suite of diagnostic functions.
+ The AD74413R differentiates itself from the AD74412R by being HART-compatible.
+ https://www.analog.com/en/products/ad74412r.html
+ https://www.analog.com/en/products/ad74413r.html
+
+properties:
+ compatible:
+ enum:
+ - adi,ad74412r
+ - adi,ad74413r
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ spi-max-frequency:
+ maximum: 1000000
+
+ spi-cpol: true
+
+ interrupts:
+ maxItems: 1
+
+ refin-supply: true
+
+ shunt-resistor-micro-ohms:
+ description:
+ Shunt (sense) resistor value in micro-Ohms.
+ default: 100000000
+
+required:
+ - compatible
+ - reg
+ - spi-max-frequency
+ - spi-cpol
+ - refin-supply
+
+additionalProperties: false
+
+patternProperties:
+ "^channel@[0-3]$":
+ type: object
+ description: Represents the external channels which are connected to the device.
+
+ properties:
+ reg:
+ description: |
+ The channel number. It can have up to 4 channels numbered from 0 to 3.
+ minimum: 0
+ maximum: 3
+
+ adi,ch-func:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Channel function.
+ HART functions are not supported on AD74412R.
+ 0 - CH_FUNC_HIGH_IMPEDANCE
+ 1 - CH_FUNC_VOLTAGE_OUTPUT
+ 2 - CH_FUNC_CURRENT_OUTPUT
+ 3 - CH_FUNC_VOLTAGE_INPUT
+ 4 - CH_FUNC_CURRENT_INPUT_EXT_POWER
+ 5 - CH_FUNC_CURRENT_INPUT_LOOP_POWER
+ 6 - CH_FUNC_RESISTANCE_INPUT
+ 7 - CH_FUNC_DIGITAL_INPUT_LOGIC
+ 8 - CH_FUNC_DIGITAL_INPUT_LOOP_POWER
+ 9 - CH_FUNC_CURRENT_INPUT_EXT_POWER_HART
+ 10 - CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART
+ minimum: 0
+ maximum: 10
+ default: 0
+
+ adi,gpo-comparator:
+ type: boolean
+ description: |
+ Whether to configure GPO as a comparator or not.
+ When not configured as a comparator, the GPO will be treated as an
+ output-only GPIO.
+
+ required:
+ - reg
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/iio/addac/adi,ad74413r.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cs-gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ ad74413r@0 {
+ compatible = "adi,ad74413r";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ spi-cpol;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+
+ refin-supply = <&ad74413r_refin>;
+
+ channel@0 {
+ reg = <0>;
+
+ adi,ch-func = <CH_FUNC_VOLTAGE_OUTPUT>;
+ };
+
+ channel@1 {
+ reg = <1>;
+
+ adi,ch-func = <CH_FUNC_CURRENT_OUTPUT>;
+ };
+
+ channel@2 {
+ reg = <2>;
+
+ adi,ch-func = <CH_FUNC_DIGITAL_INPUT_LOGIC>;
+ adi,gpo-comparator;
+ };
+
+ channel@3 {
+ reg = <3>;
+
+ adi,ch-func = <CH_FUNC_CURRENT_INPUT_EXT_POWER>;
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml
new file mode 100644
index 000000000000..501a463e5d88
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2020 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/dac/adi,ad3552r.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD2552R DAC device driver
+
+maintainers:
+ - Mihail Chindris <mihail.chindris@analog.com>
+
+description: |
+ Bindings for the Analog Devices AD3552R DAC device and similar.
+ Datasheet can be found here:
+ https://www.analog.com/media/en/technical-documentation/data-sheets/ad3542r.pdf
+ https://www.analog.com/media/en/technical-documentation/data-sheets/ad3552r.pdf
+
+properties:
+ compatible:
+ enum:
+ - adi,ad3542r
+ - adi,ad3552r
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 30000000
+
+ reset-gpios:
+ maxItems: 1
+
+ ldac-gpios:
+ description: |
+ LDAC pin to be used as a hardware trigger to update the DAC channels.
+ maxItems: 1
+
+ vref-supply:
+ description:
+ The regulator to use as an external reference. If it does not exists the
+ internal reference will be used. External reference must be 2.5V
+
+ adi,vref-out-en:
+ description: Vref I/O driven by internal vref to 2.5V. If not set, Vref pin
+ will be floating.
+ type: boolean
+
+ adi,sdo-drive-strength:
+ description: |
+ Configure SDIO0 and SDIO1 strength levels:
+ - 0: low SDO drive strength.
+ - 1: medium low SDO drive strength.
+ - 2: medium high SDO drive strength.
+ - 3: high SDO drive strength
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+patternProperties:
+ "^channel@([0-1])$":
+ type: object
+ description: Configurations of the DAC Channels
+
+ additionalProperties: false
+
+ properties:
+ reg:
+ description: Channel number
+ enum: [0, 1]
+
+ adi,output-range-microvolt: true
+
+ custom-output-range-config:
+ type: object
+ description: Configuration of custom range when
+ adi,output-range-microvolt is not present.
+ The formulas for calculation the output voltages are
+ Vout_fs = 2.5 + [(GainN + Offset/1024) * 2.5 * Rfbx * 1.03]
+ Vout_zs = 2.5 - [(GainP + Offset/1024) * 2.5 * Rfbx * 1.03]
+
+ properties:
+ adi,gain-offset:
+ description: Gain offset used in the above formula
+ $ref: /schemas/types.yaml#/definitions/int32
+ maximum: 511
+ minimum: -511
+
+ adi,gain-scaling-p-inv-log2:
+ description: GainP = 1 / ( 2 ^ adi,gain-scaling-p-inv-log2)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+
+ adi,gain-scaling-n-inv-log2:
+ description: GainN = 1 / ( 2 ^ adi,gain-scaling-n-inv-log2)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+
+ adi,rfb-ohms:
+ description: Feedback Resistor
+
+ required:
+ - adi,gain-offset
+ - adi,gain-scaling-p-inv-log2
+ - adi,gain-scaling-n-inv-log2
+ - adi,rfb-ohms
+
+ required:
+ - reg
+
+ oneOf:
+ # If adi,output-range-microvolt is missing,
+ # custom-output-range-config must be used
+ - required:
+ - adi,output-range-microvolt
+
+ - required:
+ - custom-output-range-config
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: adi,ad3542r
+ then:
+ patternProperties:
+ "^channel@([0-1])$":
+ type: object
+ properties:
+ adi,output-range-microvolt:
+ description: |
+ Voltage output range of the channel as <minimum, maximum>
+ Required connections:
+ Rfb1x for: 0 to 2.5 V; 0 to 3V; 0 to 5 V;
+ Rfb2x for: 0 to 10 V; 2.5 to 7.5V; -5 to 5 V;
+ oneOf:
+ - items:
+ - const: 0
+ - enum: [2500000, 3000000, 5000000, 10000000]
+ - items:
+ - const: -2500000
+ - const: 7500000
+ - items:
+ - const: -5000000
+ - const: 5000000
+
+ required:
+ - adi,output-range-microvolt
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: adi,ad3552r
+ then:
+ patternProperties:
+ "^channel@([0-1])$":
+ type: object
+ properties:
+ adi,output-range-microvolt:
+ description: |
+ Voltage output range of the channel as <minimum, maximum>
+ Required connections:
+ Rfb1x for: 0 to 2.5 V; 0 to 5 V;
+ Rfb2x for: 0 to 10 V; -5 to 5 V;
+ Rfb4x for: -10 to 10V
+ oneOf:
+ - items:
+ - const: 0
+ - enum: [2500000, 5000000, 10000000]
+ - items:
+ - const: -5000000
+ - const: 5000000
+ - items:
+ - const: -10000000
+ - const: 10000000
+
+required:
+ - compatible
+ - reg
+ - spi-max-frequency
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ ad3552r@0 {
+ compatible = "adi,ad3552r";
+ reg = <0>;
+ spi-max-frequency = <20000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ channel@0 {
+ reg = <0>;
+ adi,output-range-microvolt = <0 10000000>;
+ };
+ channel@1 {
+ reg = <1>;
+ custom-output-range-config {
+ adi,gain-offset = <5>;
+ adi,gain-scaling-p-inv-log2 = <1>;
+ adi,gain-scaling-n-inv-log2 = <2>;
+ adi,rfb-ohms = <1>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml
index be419ac46caa..f866b88e1440 100644
--- a/Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml
@@ -125,7 +125,6 @@ oneOf:
examples:
- |
- #include <dt-bindings/iio/adi,ad5592r.h>
spi {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml
new file mode 100644
index 000000000000..5ee80bf6aa11
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/dac/adi,ad7293.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AD7293 12-Bit Power Amplifier Current Controller with ADC,
+ DACs, Temperature and Current Sensors
+
+maintainers:
+ - Antoniu Miclaus <antoniu.miclaus@analog.com>
+
+description: |
+ Power Amplifier drain current controller containing functionality
+ for general-purpose monitoring and control of current, voltage,
+ and temperature, integrated into a single chip solution with an
+ SPI-compatible interface.
+
+ https://www.analog.com/en/products/ad7293.html
+
+properties:
+ compatible:
+ enum:
+ - adi,ad7293
+
+ avdd-supply: true
+
+ vdrive-supply: true
+
+ reset-gpios:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 1000000
+
+required:
+ - compatible
+ - reg
+ - avdd-supply
+ - vdrive-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ ad7293@0 {
+ compatible = "adi,ad7293";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ avdd-supply = <&avdd>;
+ vdrive-supply = <&vdrive>;
+ reset-gpios = <&gpio 10 0>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml b/Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml
new file mode 100644
index 000000000000..b77e855bd594
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/filter/adi,admv8818.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ADMV8818 Digitally Tunable, High-Pass and Low-Pass Filter
+
+maintainers:
+ - Antoniu Miclaus <antoniu.miclaus@analog.com>
+
+description: |
+ Fully monolithic microwave integrated circuit (MMIC) that
+ features a digitally selectable frequency of operation.
+ The device features four independently controlled high-pass
+ filters (HPFs) and four independently controlled low-pass filters
+ (LPFs) that span the 2 GHz to 18 GHz frequency range.
+
+ https://www.analog.com/en/products/admv8818.html
+
+properties:
+ compatible:
+ enum:
+ - adi,admv8818
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 10000000
+
+ clocks:
+ description:
+ Definition of the external clock.
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: rf_in
+
+ clock-output-names:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ admv8818@0 {
+ compatible = "adi,admv8818";
+ reg = <0>;
+ spi-max-frequency = <10000000>;
+ clocks = <&admv8818_rfin>;
+ clock-names = "rf_in";
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml
new file mode 100644
index 000000000000..23f1f3b55abb
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/frequency/adi,admv1013.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ADMV1013 Microwave Upconverter
+
+maintainers:
+ - Antoniu Miclaus <antoniu.miclaus@analog.com>
+
+description: |
+ Wideband, microwave upconverter optimized for point to point microwave
+ radio designs operating in the 24 GHz to 44 GHz frequency range.
+
+ https://www.analog.com/en/products/admv1013.html
+
+properties:
+ compatible:
+ enum:
+ - adi,admv1013
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 1000000
+
+ clocks:
+ description:
+ Definition of the external clock.
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: lo_in
+
+ vcm-supply:
+ description:
+ Analog voltage regulator.
+
+ adi,detector-enable:
+ description:
+ Enable the Envelope Detector available at output pins VENV_P and
+ VENV_N. Disable to reduce power consumption.
+ type: boolean
+
+ adi,input-mode:
+ description:
+ Select the input mode.
+ iq - in-phase quadrature (I/Q) input
+ if - complex intermediate frequency (IF) input
+ enum: [iq, if]
+
+ adi,quad-se-mode:
+ description:
+ Switch the LO path from differential to single-ended operation.
+ se-neg - Single-Ended Mode, Negative Side Disabled.
+ se-pos - Single-Ended Mode, Positive Side Disabled.
+ diff - Differential Mode.
+ enum: [se-neg, se-pos, diff]
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - vcm-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ admv1013@0{
+ compatible = "adi,admv1013";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ clocks = <&admv1013_lo>;
+ clock-names = "lo_in";
+ vcm-supply = <&vcm>;
+ adi,quad-se-mode = "diff";
+ adi,detector-enable;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
index d9b3213318fb..0750f700a143 100644
--- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
@@ -61,6 +61,13 @@ properties:
type: boolean
description: enable/disable internal i2c controller pullup resistors.
+ st,disable-sensor-hub:
+ type: boolean
+ description:
+ Enable/disable internal i2c controller slave autoprobing at bootstrap.
+ Disable sensor-hub is useful if i2c controller clock/data lines are
+ connected through a pull-up with other chip lines (e.g. SDO/SA0).
+
drive-open-drain:
type: boolean
description:
diff --git a/Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml b/Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml
index db0407bc9209..c8074f180a79 100644
--- a/Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml
+++ b/Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml
@@ -9,6 +9,9 @@ title: LiteON LTR501 I2C Proximity and Light sensor
maintainers:
- Nikita Travkin <nikita@trvn.ru>
+allOf:
+ - $ref: ../common.yaml#
+
properties:
compatible:
enum:
@@ -25,6 +28,8 @@ properties:
interrupts:
maxItems: 1
+ proximity-near-level: true
+
additionalProperties: false
required:
@@ -42,6 +47,8 @@ examples:
light-sensor@23 {
compatible = "liteon,ltr559";
reg = <0x23>;
+ proximity-near-level = <75>;
+
vdd-supply = <&pm8916_l17>;
vddio-supply = <&pm8916_l6>;
diff --git a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
index 099b4be927d4..00e3b59641d2 100644
--- a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
+++ b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml
@@ -10,7 +10,6 @@ title: ChromeOS EC MKBP Proximity Sensor
maintainers:
- Stephen Boyd <swboyd@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
Google's ChromeOS EC sometimes has the ability to detect user proximity.
diff --git a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
index 5377b232fa10..e8f137abb03c 100644
--- a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
+++ b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
@@ -10,7 +10,6 @@ title: ChromeOS EC Keyboard
maintainers:
- Simon Glass <sjg@chromium.org>
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
description: |
Google's ChromeOS EC Keyboard is a simple matrix keyboard
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml
index dbe7ecc19ccb..7fe1966ea28a 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.yaml
+++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml
@@ -88,12 +88,6 @@ patternProperties:
which can be disabled to suppress events from the button.
type: boolean
- pinctrl-0:
- maxItems: 1
-
- pinctrl-names:
- maxItems: 1
-
required:
- linux,code
diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
new file mode 100644
index 000000000000..b4e5ba7c0b49
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/zinitix,bt400.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Zinitix BT4xx and BT5xx series touchscreen controller bindings
+
+description: The Zinitix BT4xx and BT5xx series of touchscreen controllers
+ are Korea-produced touchscreens with embedded microcontrollers. The
+ BT4xx series was produced 2010-2013 and the BT5xx series 2013-2014.
+
+maintainers:
+ - Michael Srba <Michael.Srba@seznam.cz>
+ - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ $nodename:
+ pattern: "^touchscreen(@.*)?$"
+
+ compatible:
+ enum:
+ - zinitix,bt402
+ - zinitix,bt403
+ - zinitix,bt404
+ - zinitix,bt412
+ - zinitix,bt413
+ - zinitix,bt431
+ - zinitix,bt432
+ - zinitix,bt531
+ - zinitix,bt532
+ - zinitix,bt538
+ - zinitix,bt541
+ - zinitix,bt548
+ - zinitix,bt554
+ - zinitix,at100
+
+ reg:
+ description: I2C address on the I2C bus
+
+ clock-frequency:
+ description: I2C client clock frequency, defined for host when using
+ the device on the I2C bus
+ minimum: 0
+ maximum: 400000
+
+ interrupts:
+ description: Interrupt to host
+ maxItems: 1
+
+ vcca-supply:
+ description: Analog power supply regulator on the VCCA pin
+
+ vdd-supply:
+ description: Digital power supply regulator on the VDD pin.
+ In older device trees this can be the accidental name for the analog
+ supply on the VCCA pin, and in that case the deprecated vddo-supply is
+ used for the digital power supply.
+
+ vddo-supply:
+ description: Deprecated name for the digital power supply, use vdd-supply
+ as this reflects the real name of the pin. If this supply is present,
+ the vdd-supply represents VCCA instead of VDD. Implementers should first
+ check for this property, and if it is present assume that the vdd-supply
+ represents the analog supply.
+ deprecated: true
+
+ reset-gpios:
+ description: Reset line for the touchscreen, should be tagged
+ as GPIO_ACTIVE_LOW
+
+ zinitix,mode:
+ description: Mode of reporting touch points. Some modes may not work
+ with a particular ts firmware for unknown reasons. Available modes are
+ 1 and 2. Mode 2 is the default and preferred.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2]
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-fuzz-x: true
+ touchscreen-fuzz-y: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - touchscreen-size-x
+ - touchscreen-size-y
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touchscreen@20 {
+ compatible = "zinitix,bt541";
+ reg = <0x20>;
+ interrupt-parent = <&gpio>;
+ interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
+ vcca-supply = <&reg_vcca_tsp>;
+ vdd-supply = <&reg_vdd_tsp>;
+ touchscreen-size-x = <540>;
+ touchscreen-size-y = <960>;
+ zinitix,mode = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt b/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt
deleted file mode 100644
index 446efb9f5f55..000000000000
--- a/Documentation/devicetree/bindings/input/touchscreen/zinitix.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Device tree bindings for Zinitx BT541 touchscreen controller
-
-Required properties:
-
- - compatible : Should be "zinitix,bt541"
- - reg : I2C address of the chip. Should be 0x20
- - interrupts : Interrupt to which the chip is connected
-
-Optional properties:
-
- - vdd-supply : Analog power supply regulator on VCCA pin
- - vddo-supply : Digital power supply regulator on VDD pin
- - zinitix,mode : Mode of reporting touch points. Some modes may not work
- with a particular ts firmware for unknown reasons. Available
- modes are 1 and 2. Mode 2 is the default and preferred.
-
-The touchscreen-* properties are documented in touchscreen.txt in this
-directory.
-
-Example:
-
- i2c@00000000 {
- /* ... */
-
- bt541@20 {
- compatible = "zinitix,bt541";
- reg = <0x20>;
- interrupt-parent = <&msmgpio>;
- interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
- pinctrl-names = "default";
- pinctrl-0 = <&tsp_default>;
- vdd-supply = <&reg_vdd_tsp>;
- vddo-supply = <&pm8916_l6>;
- touchscreen-size-x = <540>;
- touchscreen-size-y = <960>;
- zinitix,mode = <2>;
- };
-
- /* ... */
- };
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
index e701524ee811..116e434d0daa 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
@@ -18,6 +18,7 @@ properties:
compatible:
enum:
- qcom,sc7180-osm-l3
+ - qcom,sc7280-epss-l3
- qcom,sc8180x-osm-l3
- qcom,sdm845-osm-l3
- qcom,sm8150-osm-l3
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml
new file mode 100644
index 000000000000..f65a2fe846de
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interconnect/qcom,qcm2290.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QCM2290 Network-On-Chip interconnect
+
+maintainers:
+ - Shawn Guo <shawn.guo@linaro.org>
+
+description: |
+ The Qualcomm QCM2290 interconnect providers support adjusting the
+ bandwidth requirements between the various NoC fabrics.
+
+properties:
+ reg:
+ maxItems: 1
+
+ compatible:
+ enum:
+ - qcom,qcm2290-bimc
+ - qcom,qcm2290-cnoc
+ - qcom,qcm2290-snoc
+
+ '#interconnect-cells':
+ const: 1
+
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Bus A Clock
+
+# Child node's properties
+patternProperties:
+ '^interconnect-[a-z0-9]+$':
+ type: object
+ description:
+ The interconnect providers do not have a separate QoS register space,
+ but share parent's space.
+
+ properties:
+ compatible:
+ enum:
+ - qcom,qcm2290-qup-virt
+ - qcom,qcm2290-mmrt-virt
+ - qcom,qcm2290-mmnrt-virt
+
+ '#interconnect-cells':
+ const: 1
+
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Bus A Clock
+
+ required:
+ - compatible
+ - '#interconnect-cells'
+ - clock-names
+ - clocks
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - '#interconnect-cells'
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ snoc: interconnect@1880000 {
+ compatible = "qcom,qcm2290-snoc";
+ reg = <0x01880000 0x60200>;
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
+ <&rpmcc RPM_SMD_SNOC_A_CLK>;
+
+ qup_virt: interconnect-qup {
+ compatible = "qcom,qcm2290-qup-virt";
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_QUP_CLK>,
+ <&rpmcc RPM_SMD_QUP_A_CLK>;
+ };
+
+ mmnrt_virt: interconnect-mmnrt {
+ compatible = "qcom,qcm2290-mmnrt-virt";
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_MMNRT_CLK>,
+ <&rpmcc RPM_SMD_MMNRT_A_CLK>;
+ };
+
+ mmrt_virt: interconnect-mmrt {
+ compatible = "qcom,qcm2290-mmrt-virt";
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_MMRT_CLK>,
+ <&rpmcc RPM_SMD_MMRT_A_CLK>;
+ };
+ };
+
+ cnoc: interconnect@1900000 {
+ compatible = "qcom,qcm2290-cnoc";
+ reg = <0x01900000 0x8200>;
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_CNOC_CLK>,
+ <&rpmcc RPM_SMD_CNOC_A_CLK>;
+ };
+
+ bimc: interconnect@4480000 {
+ compatible = "qcom,qcm2290-bimc";
+ reg = <0x04480000 0x80000>;
+ #interconnect-cells = <1>;
+ clock-names = "bus", "bus_a";
+ clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+ <&rpmcc RPM_SMD_BIMC_A_CLK>;
+ };
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
index 983d71fb5399..e4c3c2818119 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
@@ -27,22 +27,37 @@ properties:
- qcom,msm8939-pcnoc
- qcom,msm8939-snoc
- qcom,msm8939-snoc-mm
+ - qcom,msm8996-a0noc
+ - qcom,msm8996-a1noc
+ - qcom,msm8996-a2noc
+ - qcom,msm8996-bimc
+ - qcom,msm8996-cnoc
+ - qcom,msm8996-mnoc
+ - qcom,msm8996-pnoc
+ - qcom,msm8996-snoc
- qcom,qcs404-bimc
- qcom,qcs404-pcnoc
- qcom,qcs404-snoc
+ - qcom,sdm660-a2noc
+ - qcom,sdm660-bimc
+ - qcom,sdm660-cnoc
+ - qcom,sdm660-gnoc
+ - qcom,sdm660-mnoc
+ - qcom,sdm660-snoc
'#interconnect-cells':
const: 1
+ clocks:
+ minItems: 2
+ maxItems: 7
+
clock-names:
- items:
- - const: bus
- - const: bus_a
+ minItems: 2
+ maxItems: 7
- clocks:
- items:
- - description: Bus Clock
- - description: Bus A Clock
+ power-domains:
+ maxItems: 1
required:
- compatible
@@ -53,6 +68,120 @@ required:
additionalProperties: false
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8916-bimc
+ - qcom,msm8916-pcnoc
+ - qcom,msm8916-snoc
+ - qcom,msm8939-bimc
+ - qcom,msm8939-pcnoc
+ - qcom,msm8939-snoc
+ - qcom,msm8939-snoc-mm
+ - qcom,msm8996-a1noc
+ - qcom,msm8996-a2noc
+ - qcom,msm8996-bimc
+ - qcom,msm8996-cnoc
+ - qcom,msm8996-pnoc
+ - qcom,msm8996-snoc
+ - qcom,qcs404-bimc
+ - qcom,qcs404-pcnoc
+ - qcom,qcs404-snoc
+ - qcom,sdm660-bimc
+ - qcom,sdm660-cnoc
+ - qcom,sdm660-gnoc
+ - qcom,sdm660-snoc
+
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Bus A Clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8996-mnoc
+ - qcom,sdm660-mnoc
+
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+ - const: iface
+
+ clocks:
+ items:
+ - description: Bus Clock.
+ - description: Bus A Clock.
+ - description: CPU-NoC High-performance Bus Clock.
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8996-a0noc
+
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: aggre0_snoc_axi
+ - const: aggre0_cnoc_ahb
+ - const: aggre0_noc_mpu_cfg
+
+ clocks:
+ items:
+ - description: Aggregate0 System NoC AXI Clock.
+ - description: Aggregate0 Config NoC AHB Clock.
+ - description: Aggregate0 NoC MPU Clock.
+
+ required:
+ - power-domains
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sdm660-a2noc
+
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+ - const: ipa
+ - const: ufs_axi
+ - const: aggre2_ufs_axi
+ - const: aggre2_usb3_axi
+ - const: cfg_noc_usb2_axi
+
+ clocks:
+ items:
+ - description: Bus Clock.
+ - description: Bus A Clock.
+ - description: IPA Clock.
+ - description: UFS AXI Clock.
+ - description: Aggregate2 UFS AXI Clock.
+ - description: Aggregate2 USB3 AXI Clock.
+ - description: Config NoC USB2 AXI Clock.
+
examples:
- |
#include <dt-bindings/clock/qcom,rpmcc.h>
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
index 3fd1a134162d..cbb24f9bb609 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
@@ -104,6 +104,17 @@ properties:
- qcom,sm8350-mmss-noc
- qcom,sm8350-compute-noc
- qcom,sm8350-system-noc
+ - qcom,sm8450-aggre1-noc
+ - qcom,sm8450-aggre2-noc
+ - qcom,sm8450-clk-virt
+ - qcom,sm8450-config-noc
+ - qcom,sm8450-gem-noc
+ - qcom,sm8450-lpass-ag-noc
+ - qcom,sm8450-mc-virt
+ - qcom,sm8450-mmss-noc
+ - qcom,sm8450-nsp-noc
+ - qcom,sm8450-pcie-anoc
+ - qcom,sm8450-system-noc
'#interconnect-cells':
enum: [ 1, 2 ]
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml
deleted file mode 100644
index bcd41e491f1d..000000000000
--- a/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/interconnect/qcom,sdm660.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Qualcomm SDM660 Network-On-Chip interconnect
-
-maintainers:
- - AngeloGioacchino Del Regno <kholk11@gmail.com>
-
-description: |
- The Qualcomm SDM660 interconnect providers support adjusting the
- bandwidth requirements between the various NoC fabrics.
-
-properties:
- reg:
- maxItems: 1
-
- compatible:
- enum:
- - qcom,sdm660-a2noc
- - qcom,sdm660-bimc
- - qcom,sdm660-cnoc
- - qcom,sdm660-gnoc
- - qcom,sdm660-mnoc
- - qcom,sdm660-snoc
-
- '#interconnect-cells':
- const: 1
-
- clocks:
- minItems: 1
- maxItems: 7
-
- clock-names:
- minItems: 1
- maxItems: 7
-
-required:
- - compatible
- - reg
- - '#interconnect-cells'
- - clock-names
- - clocks
-
-additionalProperties: false
-
-allOf:
- - if:
- properties:
- compatible:
- contains:
- enum:
- - qcom,sdm660-mnoc
- then:
- properties:
- clocks:
- items:
- - description: Bus Clock.
- - description: Bus A Clock.
- - description: CPU-NoC High-performance Bus Clock.
- clock-names:
- items:
- - const: bus
- - const: bus_a
- - const: iface
-
- - if:
- properties:
- compatible:
- contains:
- enum:
- - qcom,sdm660-a2noc
- then:
- properties:
- clocks:
- items:
- - description: Bus Clock.
- - description: Bus A Clock.
- - description: IPA Clock.
- - description: UFS AXI Clock.
- - description: Aggregate2 UFS AXI Clock.
- - description: Aggregate2 USB3 AXI Clock.
- - description: Config NoC USB2 AXI Clock.
- clock-names:
- items:
- - const: bus
- - const: bus_a
- - const: ipa
- - const: ufs_axi
- - const: aggre2_ufs_axi
- - const: aggre2_usb3_axi
- - const: cfg_noc_usb2_axi
-
- - if:
- properties:
- compatible:
- contains:
- enum:
- - qcom,sdm660-bimc
- - qcom,sdm660-cnoc
- - qcom,sdm660-gnoc
- - qcom,sdm660-snoc
- then:
- properties:
- clocks:
- items:
- - description: Bus Clock.
- - description: Bus A Clock.
- clock-names:
- items:
- - const: bus
- - const: bus_a
-
-examples:
- - |
- #include <dt-bindings/clock/qcom,rpmcc.h>
- #include <dt-bindings/clock/qcom,mmcc-sdm660.h>
- #include <dt-bindings/clock/qcom,gcc-sdm660.h>
-
- bimc: interconnect@1008000 {
- compatible = "qcom,sdm660-bimc";
- reg = <0x01008000 0x78000>;
- #interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
- };
-
- cnoc: interconnect@1500000 {
- compatible = "qcom,sdm660-cnoc";
- reg = <0x01500000 0x10000>;
- #interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_CNOC_CLK>,
- <&rpmcc RPM_SMD_CNOC_A_CLK>;
- };
-
- snoc: interconnect@1626000 {
- compatible = "qcom,sdm660-snoc";
- reg = <0x01626000 0x7090>;
- #interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
- };
-
- a2noc: interconnect@1704000 {
- compatible = "qcom,sdm660-a2noc";
- reg = <0x01704000 0xc100>;
- #interconnect-cells = <1>;
- clock-names = "bus",
- "bus_a",
- "ipa",
- "ufs_axi",
- "aggre2_ufs_axi",
- "aggre2_usb3_axi",
- "cfg_noc_usb2_axi";
- clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>,
- <&rpmcc RPM_SMD_AGGR2_NOC_A_CLK>,
- <&rpmcc RPM_SMD_IPA_CLK>,
- <&gcc GCC_UFS_AXI_CLK>,
- <&gcc GCC_AGGRE2_UFS_AXI_CLK>,
- <&gcc GCC_AGGRE2_USB3_AXI_CLK>,
- <&gcc GCC_CFG_NOC_USB2_AXI_CLK>;
- };
-
- mnoc: interconnect@1745000 {
- compatible = "qcom,sdm660-mnoc";
- reg = <0x01745000 0xa010>;
- #interconnect-cells = <1>;
- clock-names = "bus", "bus_a", "iface";
- clocks = <&rpmcc RPM_SMD_MMSSNOC_AXI_CLK>,
- <&rpmcc RPM_SMD_MMSSNOC_AXI_CLK_A>,
- <&mmcc AHB_CLK_SRC>;
- };
-
- gnoc: interconnect@17900000 {
- compatible = "qcom,sdm660-gnoc";
- reg = <0x17900000 0xe000>;
- #interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&xo_board>, <&xo_board>;
- };
diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
index 1ef849dc74d7..e2e6e9aa0fe6 100644
--- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
@@ -81,14 +81,12 @@ properties:
data-lanes:
description:
Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines.
+ minItems: 1
items:
- minItems: 1
- maxItems: 4
- items:
- - const: 1
- - const: 2
- - const: 3
- - const: 4
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
index 9c04fa85ee5c..1b3e1c4b99ed 100644
--- a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml
@@ -87,14 +87,12 @@ properties:
properties:
data-lanes:
+ minItems: 1
items:
- minItems: 1
- maxItems: 4
- items:
- - const: 1
- - const: 2
- - const: 3
- - const: 4
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
index 5dce62a7eff2..68c75a517c92 100644
--- a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml
@@ -245,8 +245,7 @@ examples:
interrupt-controller;
#interrupt-cells = <2>;
- interrupts = <&host_irq1>;
- interrupt-parent = <&gic>;
+ interrupts = <4 1 0>;
gpio-controller;
#gpio-cells = <2>;
diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index 0faa4da6c7c8..d1f53bd449f7 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -8,7 +8,6 @@ title: ChromeOS Embedded Controller
maintainers:
- Benson Leung <bleung@chromium.org>
- - Enric Balletbo i Serra <enric.balletbo@collabora.com>
- Guenter Roeck <groeck@chromium.org>
description:
diff --git a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
index a4f74bec68a3..1e69a5a42439 100644
--- a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
+++ b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
@@ -185,6 +185,9 @@ examples:
clock-names = "mclk", "apb_pclk";
};
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
mmc@80126000 {
compatible = "arm,pl18x", "arm,primecell";
reg = <0x80126000 0x1000>;
@@ -206,12 +209,12 @@ examples:
vqmmc-supply = <&vmmci>;
};
+ - |
mmc@101f6000 {
compatible = "arm,pl18x", "arm,primecell";
reg = <0x101f6000 0x1000>;
clocks = <&sdiclk>, <&pclksdi>;
clock-names = "mclk", "apb_pclk";
- interrupt-parent = <&vica>;
interrupts = <22>;
max-frequency = <400000>;
bus-width = <4>;
@@ -226,6 +229,7 @@ examples:
vmmc-supply = <&vmmc_regulator>;
};
+ - |
mmc@52007000 {
compatible = "arm,pl18x", "arm,primecell";
arm,primecell-periphid = <0x10153180>;
diff --git a/Documentation/devicetree/bindings/mux/gpio-mux.yaml b/Documentation/devicetree/bindings/mux/gpio-mux.yaml
index 0a7c8d64981a..ee4de9fbaf4d 100644
--- a/Documentation/devicetree/bindings/mux/gpio-mux.yaml
+++ b/Documentation/devicetree/bindings/mux/gpio-mux.yaml
@@ -26,7 +26,10 @@ properties:
List of gpios used to control the multiplexer, least significant bit first.
'#mux-control-cells':
- const: 0
+ enum: [ 0, 1 ]
+
+ '#mux-state-cells':
+ enum: [ 1, 2 ]
idle-state:
default: -1
@@ -34,7 +37,11 @@ properties:
required:
- compatible
- mux-gpios
- - "#mux-control-cells"
+anyOf:
+ - required:
+ - "#mux-control-cells"
+ - required:
+ - "#mux-state-cells"
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/mux/mux-consumer.yaml b/Documentation/devicetree/bindings/mux/mux-consumer.yaml
index 7af93298ab5c..d3d854967359 100644
--- a/Documentation/devicetree/bindings/mux/mux-consumer.yaml
+++ b/Documentation/devicetree/bindings/mux/mux-consumer.yaml
@@ -25,6 +25,17 @@ description: |
strings to label each of the mux controllers listed in the "mux-controls"
property.
+ If it is required to provide the state that the mux controller needs to
+ be set to, the property "mux-states" must be used. An optional property
+ "mux-state-names" can be used to provide a list of strings, to label
+ each of the multiplixer states listed in the "mux-states" property.
+
+ Properties "mux-controls" and "mux-states" can be used depending on how
+ the consumers want to control the mux controller. If the consumer needs
+ needs to set multiple states in a mux controller, then property
+ "mux-controls" can be used. If the consumer needs to set the mux
+ controller to a given state then property "mux-states" can be used.
+
mux-ctrl-specifier typically encodes the chip-relative mux controller number.
If the mux controller chip only provides a single mux controller, the
mux-ctrl-specifier can typically be left out.
@@ -35,12 +46,22 @@ properties:
mux-controls:
$ref: /schemas/types.yaml#/definitions/phandle-array
+ mux-states:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+
mux-control-names:
description:
Devices that use more than a single mux controller can use the
"mux-control-names" property to map the name of the requested mux
controller to an index into the list given by the "mux-controls" property.
+ mux-state-names:
+ description:
+ Devices that use more than a single multiplexer state can use the
+ "mux-state-names" property to map the name of the requested mux
+ controller to an index into the list given by the "mux-states"
+ property.
+
additionalProperties: true
...
diff --git a/Documentation/devicetree/bindings/mux/mux-controller.yaml b/Documentation/devicetree/bindings/mux/mux-controller.yaml
index 736a84c3b6a5..c855fbad3884 100644
--- a/Documentation/devicetree/bindings/mux/mux-controller.yaml
+++ b/Documentation/devicetree/bindings/mux/mux-controller.yaml
@@ -25,7 +25,9 @@ description: |
--------------------
Mux controller nodes must specify the number of cells used for the
- specifier using the '#mux-control-cells' property.
+ specifier using the '#mux-control-cells' or '#mux-state-cells' property.
+ The value of '#mux-state-cells' will always be one greater than the value
+ of '#mux-control-cells'.
Optionally, mux controller nodes can also specify the state the mux should
have when it is idle. The idle-state property is used for this. If the
@@ -67,6 +69,8 @@ select:
pattern: '^mux-controller'
- required:
- '#mux-control-cells'
+ - required:
+ - '#mux-state-cells'
properties:
$nodename:
@@ -75,6 +79,9 @@ properties:
'#mux-control-cells':
enum: [ 0, 1 ]
+ '#mux-state-cells':
+ enum: [ 1, 2 ]
+
idle-state:
$ref: /schemas/types.yaml#/definitions/int32
minimum: -2
@@ -179,4 +186,21 @@ examples:
};
};
};
+
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ mux1: mux-controller {
+ compatible = "gpio-mux";
+ #mux-state-cells = <1>;
+ mux-gpios = <&exp_som 2 GPIO_ACTIVE_HIGH>;
+ };
+
+ transceiver4: can-phy4 {
+ compatible = "ti,tcan1042";
+ #phy-cells = <0>;
+ max-bitrate = <5000000>;
+ standby-gpios = <&exp_som 7 GPIO_ACTIVE_HIGH>;
+ mux-states = <&mux1 1>;
+ };
...
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
index fb547e26c676..401ab7cdb379 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -76,33 +76,31 @@ properties:
M_CAN user manual for details.
$ref: /schemas/types.yaml#/definitions/int32-array
items:
- items:
- - description: The 'offset' is an address offset of the Message RAM where
- the following elements start from. This is usually set to 0x0 if
- you're using a private Message RAM.
- default: 0
- - description: 11-bit Filter 0-128 elements / 0-128 words
- minimum: 0
- maximum: 128
- - description: 29-bit Filter 0-64 elements / 0-128 words
- minimum: 0
- maximum: 64
- - description: Rx FIFO 0 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Rx FIFO 1 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Rx Buffers 0-64 elements / 0-1152 words
- minimum: 0
- maximum: 64
- - description: Tx Event FIFO 0-32 elements / 0-64 words
- minimum: 0
- maximum: 32
- - description: Tx Buffers 0-32 elements / 0-576 words
- minimum: 0
- maximum: 32
- maxItems: 1
+ - description: The 'offset' is an address offset of the Message RAM where
+ the following elements start from. This is usually set to 0x0 if
+ you're using a private Message RAM.
+ default: 0
+ - description: 11-bit Filter 0-128 elements / 0-128 words
+ minimum: 0
+ maximum: 128
+ - description: 29-bit Filter 0-64 elements / 0-128 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 0 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 1 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx Buffers 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Tx Event FIFO 0-32 elements / 0-64 words
+ minimum: 0
+ maximum: 32
+ - description: Tx Buffers 0-32 elements / 0-576 words
+ minimum: 0
+ maximum: 32
power-domains:
description:
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 47b5f728701d..34c5463abcec 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -17,9 +17,8 @@ properties:
description:
Specifies the MAC address that was assigned to the network device.
$ref: /schemas/types.yaml#/definitions/uint8-array
- items:
- - minItems: 6
- maxItems: 6
+ minItems: 6
+ maxItems: 6
mac-address:
description:
@@ -28,9 +27,8 @@ properties:
to the device by the boot program is different from the
local-mac-address property.
$ref: /schemas/types.yaml#/definitions/uint8-array
- items:
- - minItems: 6
- maxItems: 6
+ minItems: 6
+ maxItems: 6
max-frame-size:
$ref: /schemas/types.yaml#/definitions/uint32
@@ -164,33 +162,30 @@ properties:
type: array
then:
deprecated: true
- minItems: 1
- maxItems: 1
items:
- items:
- - minimum: 0
- maximum: 31
- description:
- Emulated PHY ID, choose any but unique to the all
- specified fixed-links
-
- - enum: [0, 1]
- description:
- Duplex configuration. 0 for half duplex or 1 for
- full duplex
-
- - enum: [10, 100, 1000, 2500, 10000]
- description:
- Link speed in Mbits/sec.
-
- - enum: [0, 1]
- description:
- Pause configuration. 0 for no pause, 1 for pause
-
- - enum: [0, 1]
- description:
- Asymmetric pause configuration. 0 for no asymmetric
- pause, 1 for asymmetric pause
+ - minimum: 0
+ maximum: 31
+ description:
+ Emulated PHY ID, choose any but unique to the all
+ specified fixed-links
+
+ - enum: [0, 1]
+ description:
+ Duplex configuration. 0 for half duplex or 1 for
+ full duplex
+
+ - enum: [10, 100, 1000, 2500, 10000]
+ description:
+ Link speed in Mbits/sec.
+
+ - enum: [0, 1]
+ description:
+ Pause configuration. 0 for no pause, 1 for pause
+
+ - enum: [0, 1]
+ description:
+ Asymmetric pause configuration. 0 for no asymmetric
+ pause, 1 for asymmetric pause
- if:
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index c00fb0d22c7b..020337f3c05f 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -410,6 +410,15 @@ PROPERTIES
The settings and programming routines for internal/external
MDIO are different. Must be included for internal MDIO.
+- fsl,erratum-a009885
+ Usage: optional
+ Value type: <boolean>
+ Definition: Indicates the presence of the A009885
+ erratum describing that the contents of MDIO_DATA may
+ become corrupt unless it is read within 16 MDC cycles
+ of MDIO_CFG[BSY] being cleared, when performing an
+ MDIO read operation.
+
- fsl,erratum-a011043
Usage: optional
Value type: <boolean>
diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
index d7117a22fd87..27db496f1ce8 100644
--- a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
@@ -9,6 +9,9 @@ Required properties on all platforms:
- compatible: For the OX820 SoC, it should be :
- "oxsemi,ox820-dwmac" to select glue
- "snps,dwmac-3.512" to select IP version.
+ For the OX810SE SoC, it should be :
+ - "oxsemi,ox810se-dwmac" to select glue
+ - "snps,dwmac-3.512" to select IP version.
- clocks: Should contain phandles to the following clocks
- clock-names: Should contain the following:
diff --git a/Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml b/Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml
index 58ff6b0bdb1a..8c3f0cd22821 100644
--- a/Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml
+++ b/Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml
@@ -24,6 +24,9 @@ properties:
compatible:
const: brcm,nvram
+ reg:
+ maxItems: 1
+
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
index b6791702bcfc..39d529599444 100644
--- a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
@@ -8,8 +8,10 @@ Required properties:
"mediatek,mt7623-efuse", "mediatek,efuse": for MT7623
"mediatek,mt8173-efuse" or "mediatek,efuse": for MT8173
"mediatek,mt8192-efuse", "mediatek,efuse": for MT8192
+ "mediatek,mt8195-efuse", "mediatek,efuse": for MT8195
"mediatek,mt8516-efuse", "mediatek,efuse": for MT8516
- reg: Should contain registers location and length
+- bits: contain the bits range by offset and size
= Data cells =
Are child nodes of MTK-EFUSE, bindings of which as described in
diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
index 456fb808100a..43ed7e32e5ac 100644
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
@@ -50,16 +50,15 @@ patternProperties:
Offset and size in bytes within the storage device.
bits:
- maxItems: 1
+ $ref: /schemas/types.yaml#/definitions/uint32-array
items:
- items:
- - minimum: 0
- maximum: 7
- description:
- Offset in bit within the address range specified by reg.
- - minimum: 1
- description:
- Size in bit within the address range specified by reg.
+ - minimum: 0
+ maximum: 7
+ description:
+ Offset in bit within the address range specified by reg.
+ - minimum: 1
+ description:
+ Size in bit within the address range specified by reg.
required:
- reg
diff --git a/Documentation/devicetree/bindings/nvmem/rmem.yaml b/Documentation/devicetree/bindings/nvmem/rmem.yaml
index 1d85a0a30846..a4a755dcfc43 100644
--- a/Documentation/devicetree/bindings/nvmem/rmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/rmem.yaml
@@ -19,6 +19,9 @@ properties:
- raspberrypi,bootloader-config
- const: nvmem-rmem
+ reg:
+ maxItems: 1
+
no-map:
$ref: /schemas/types.yaml#/definitions/flag
description:
diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
index a48c8fa56bce..448a2678dc62 100644
--- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
@@ -24,6 +24,9 @@ properties:
- st,stm32f4-otp
- st,stm32mp15-bsec
+ reg:
+ maxItems: 1
+
patternProperties:
"^.*@[0-9a-f]+$":
type: object
diff --git a/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
index bc1dcb483dbb..0f064e4222f3 100644
--- a/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
@@ -146,11 +146,15 @@ examples:
#address-cells = <3>;
#size-cells = <2>;
#interrupt-cells = <1>;
- interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pcie", "msi";
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH
+ 0 0 0 2 &gicv2 GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH
+ 0 0 0 3 &gicv2 GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH
+ 0 0 0 4 &gicv2 GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
+
msi-parent = <&pcie0>;
msi-controller;
ranges = <0x02000000 0x0 0xf8000000 0x6 0x00000000 0x0 0x04000000>;
@@ -158,5 +162,24 @@ examples:
<0x42000000 0x1 0x80000000 0x3 0x00000000 0x0 0x80000000>;
brcm,enable-ssc;
brcm,scb-sizes = <0x0000000080000000 0x0000000080000000>;
+
+ /* PCIe bridge, Root Port */
+ pci@0,0 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ reg = <0x0 0x0 0x0 0x0 0x0>;
+ compatible = "pciclass,0604";
+ device_type = "pci";
+ vpcie3v3-supply = <&vreg7>;
+ ranges;
+
+ /* PCIe endpoint */
+ pci-ep@0,0 {
+ assigned-addresses =
+ <0x82010000 0x0 0xf8000000 0x6 0x00000000 0x0 0x2000>;
+ reg = <0x0 0x0 0x0 0x0 0x0>;
+ compatible = "pci14e4,1688";
+ };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
index acea1cd444fd..643a6333b07b 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@@ -127,6 +127,12 @@ properties:
enum: [1, 2, 3, 4]
default: 1
+ phys:
+ maxItems: 1
+
+ phy-names:
+ const: pcie-phy
+
reset-gpio:
description: Should specify the GPIO for controlling the PCI bus device
reset signal. It's not polarity aware and defaults to active-low reset
diff --git a/Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml b/Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml
index a6896cb40e83..74195c1f5292 100644
--- a/Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml
@@ -32,8 +32,12 @@ properties:
maxItems: 1
ti,syscon-pcie-mode:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: Phandle to the SYSCON entry
+ - description: pcie_ctrl register offset within SYSCON
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
- $ref: /schemas/types.yaml#/definitions/phandle
interrupts:
minItems: 1
@@ -65,7 +69,7 @@ examples:
<0x5506000 0x1000>;
reg-names = "app", "dbics", "addr_space", "atu";
power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
- ti,syscon-pcie-mode = <&pcie0_mode>;
+ ti,syscon-pcie-mode = <&scm_conf 0x4060>;
max-link-speed = <2>;
dma-coherent;
interrupts = <GIC_SPI 340 IRQ_TYPE_EDGE_RISING>;
diff --git a/Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml
index eabe1635e336..a20dccbafd94 100644
--- a/Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml
@@ -36,12 +36,20 @@ properties:
maxItems: 1
ti,syscon-pcie-id:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: Phandle to the SYSCON entry
+ - description: pcie_device_id register offset within SYSCON
description: Phandle to the SYSCON entry required for getting PCIe device/vendor ID
- $ref: /schemas/types.yaml#/definitions/phandle
ti,syscon-pcie-mode:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: Phandle to the SYSCON entry
+ - description: pcie_ctrl register offset within SYSCON
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
- $ref: /schemas/types.yaml#/definitions/phandle
msi-map: true
@@ -87,8 +95,8 @@ examples:
#size-cells = <2>;
ranges = <0x81000000 0 0 0x10020000 0 0x00010000>,
<0x82000000 0 0x10030000 0x10030000 0 0x07FD0000>;
- ti,syscon-pcie-id = <&pcie_devid>;
- ti,syscon-pcie-mode = <&pcie0_mode>;
+ ti,syscon-pcie-id = <&scm_conf 0x0210>;
+ ti,syscon-pcie-mode = <&scm_conf 0x4060>;
bus-range = <0x0 0xff>;
max-link-speed = <2>;
dma-coherent;
diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson8-hdmi-tx-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson8-hdmi-tx-phy.yaml
new file mode 100644
index 000000000000..1f085cdd1c85
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/amlogic,meson8-hdmi-tx-phy.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/amlogic,meson8-hdmi-tx-phy.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Meson8, Meson8b and Meson8m2 HDMI TX PHY
+
+maintainers:
+ - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+description: |+
+ The HDMI TX PHY node should be the child of a syscon node with the
+ required property:
+
+ compatible = "amlogic,meson-hhi-sysctrl", "simple-mfd", "syscon"
+
+ Refer to the bindings described in
+ Documentation/devicetree/bindings/mfd/syscon.yaml
+
+properties:
+ $nodename:
+ pattern: "^hdmi-phy@[0-9a-f]+$"
+
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - amlogic,meson8b-hdmi-tx-phy
+ - amlogic,meson8m2-hdmi-tx-phy
+ - const: amlogic,meson8-hdmi-tx-phy
+ - const: amlogic,meson8-hdmi-tx-phy
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ description:
+ HDMI TMDS clock
+
+ "#phy-cells":
+ const: 0
+
+required:
+ - compatible
+ - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ hdmi-phy@3a0 {
+ compatible = "amlogic,meson8-hdmi-tx-phy";
+ reg = <0x3a0 0xc>;
+ clocks = <&tmds_clock>;
+ #phy-cells = <0>;
+ };
+ - |
+ hdmi-phy@3a0 {
+ compatible = "amlogic,meson8b-hdmi-tx-phy", "amlogic,meson8-hdmi-tx-phy";
+ reg = <0x3a0 0xc>;
+ clocks = <&tmds_clock>;
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml
new file mode 100644
index 000000000000..b6421eedece3
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/fsl,imx8-pcie-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8 SoC series PCIe PHY Device Tree Bindings
+
+maintainers:
+ - Richard Zhu <hongxing.zhu@nxp.com>
+
+properties:
+ "#phy-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - fsl,imx8mm-pcie-phy
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ref
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ items:
+ - const: pciephy
+
+ fsl,refclk-pad-mode:
+ description: |
+ Specifies the mode of the refclk pad used. It can be UNUSED(PHY
+ refclock is derived from SoC internal source), INPUT(PHY refclock
+ is provided externally via the refclk pad) or OUTPUT(PHY refclock
+ is derived from SoC internal source and provided on the refclk pad).
+ Refer include/dt-bindings/phy/phy-imx8-pcie.h for the constants
+ to be used.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 0, 1, 2 ]
+
+ fsl,tx-deemph-gen1:
+ description: Gen1 De-emphasis value (optional).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ fsl,tx-deemph-gen2:
+ description: Gen2 De-emphasis value (optional).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ fsl,clkreq-unsupported:
+ type: boolean
+ description: A boolean property indicating the CLKREQ# signal is
+ not supported in the board design (optional)
+
+required:
+ - "#phy-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - fsl,refclk-pad-mode
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mm-clock.h>
+ #include <dt-bindings/phy/phy-imx8-pcie.h>
+ #include <dt-bindings/reset/imx8mq-reset.h>
+
+ pcie_phy: pcie-phy@32f00000 {
+ compatible = "fsl,imx8mm-pcie-phy";
+ reg = <0x32f00000 0x10000>;
+ clocks = <&clk IMX8MM_CLK_PCIE1_PHY>;
+ clock-names = "ref";
+ assigned-clocks = <&clk IMX8MM_CLK_PCIE1_PHY>;
+ assigned-clock-rates = <100000000>;
+ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_100M>;
+ resets = <&src IMX8MQ_RESET_PCIEPHY>;
+ reset-names = "pciephy";
+ fsl,refclk-pad-mode = <IMX8_PCIE_REFCLK_PAD_INPUT>;
+ #phy-cells = <0>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
new file mode 100644
index 000000000000..34bdb5c4cae8
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/intel,phy-thunderbay-emmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Thunder Bay eMMC PHY bindings
+
+maintainers:
+ - Srikandan Nandhini <nandhini.srikandan@intel.com>
+
+properties:
+ compatible:
+ const: intel,thunderbay-emmc-phy
+
+ "#phy-cells":
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: emmcclk
+
+required:
+ - "#phy-cells"
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ mmc_phy@80440800 {
+ #phy-cells = <0x0>;
+ compatible = "intel,thunderbay-emmc-phy";
+ status = "okay";
+ reg = <0x80440800 0x100>;
+ clocks = <&emmc>;
+ clock-names = "emmcclk";
+ };
diff --git a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
index 9e6c0f43f1c6..05ee274b4b71 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
@@ -160,6 +160,24 @@ patternProperties:
- PHY_TYPE_PCIE
- PHY_TYPE_SATA
+ nvmem-cells:
+ items:
+ - description: internal R efuse for U2 PHY or U3/PCIe PHY
+ - description: rx_imp_sel efuse for U3/PCIe PHY
+ - description: tx_imp_sel efuse for U3/PCIe PHY
+ description: |
+ Phandles to nvmem cell that contains the efuse data;
+ Available only for U2 PHY or U3/PCIe PHY of version 2/3, these
+ three items should be provided at the same time for U3/PCIe PHY,
+ when use software to load efuse;
+ If unspecified, will use hardware auto-load efuse.
+
+ nvmem-cell-names:
+ items:
+ - const: intr
+ - const: rx_imp
+ - const: tx_imp
+
# The following optional vendor properties are only for debug or HQA test
mediatek,eye-src:
description:
diff --git a/Documentation/devicetree/bindings/phy/microchip,lan966x-serdes.yaml b/Documentation/devicetree/bindings/phy/microchip,lan966x-serdes.yaml
new file mode 100644
index 000000000000..6e914fbbac56
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/microchip,lan966x-serdes.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/microchip,lan966x-serdes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip Lan966x Serdes controller
+
+maintainers:
+ - Horatiu Vultur <horatiu.vultur@microchip.com>
+
+description: |
+ Lan966x has 7 interfaces, consisting of 2 copper transceivers(CU),
+ 3 SERDES6G and 2 RGMII interfaces. Two of the SERDES6G support QSGMII.
+ Also it has 8 logical Ethernet ports which can be connected to these
+ interfaces. The Serdes controller will allow to configure these interfaces
+ and allows to "mux" the interfaces to different ports.
+
+ For simple selection of the interface that is used with a port, the
+ following macros are defined CU(X), SERDES6G(X), RGMII(X). Where X is a
+ number that represents the index of that interface type. For example
+ CU(1) means use interface copper transceivers 1. SERDES6G(2) means use
+ interface SerDes 2.
+
+properties:
+ $nodename:
+ pattern: "^serdes@[0-9a-f]+$"
+
+ compatible:
+ const: microchip,lan966x-serdes
+
+ reg:
+ items:
+ - description: HSIO registers
+ - description: HW_STAT register
+
+ '#phy-cells':
+ const: 2
+ description: |
+ - Input port to use for a given macro.
+ - The macro to be used. The macros are defined in
+ dt-bindings/phy/phy-lan966x-serdes.
+
+required:
+ - compatible
+ - reg
+ - '#phy-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ serdes: serdes@e2004010 {
+ compatible = "microchip,lan966x-serdes";
+ reg = <0xe202c000 0x9c>, <0xe2004010 0x4>;
+ #phy-cells = <2>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
index e71b32c9c0d1..a9e227d8b076 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
@@ -113,6 +113,15 @@ patternProperties:
minimum: 1
maximum: 16
+ cdns,ssc-mode:
+ description:
+ Specifies the Spread Spectrum Clocking mode used. It can be NO_SSC,
+ EXTERNAL_SSC or INTERNAL_SSC.
+ Refer include/dt-bindings/phy/phy-cadence.h for the constants to be used.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+ default: 1
+
required:
- reg
- resets
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
index bd9ae11c9994..2fec9e54ad0e 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
@@ -202,7 +202,7 @@ examples:
#phy-cells = <0>;
cdns,phy-type = <PHY_TYPE_PCIE>;
cdns,num-lanes = <2>;
- cdns,ssc-mode = <TORRENT_SERDES_NO_SSC>;
+ cdns,ssc-mode = <CDNS_SERDES_NO_SSC>;
};
phy@2 {
@@ -211,7 +211,7 @@ examples:
#phy-cells = <0>;
cdns,phy-type = <PHY_TYPE_SGMII>;
cdns,num-lanes = <1>;
- cdns,ssc-mode = <TORRENT_SERDES_NO_SSC>;
+ cdns,ssc-mode = <CDNS_SERDES_NO_SSC>;
};
};
};
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml
index 5bebd86bf8b6..4b75289735eb 100644
--- a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml
@@ -18,6 +18,7 @@ properties:
- rockchip,rk3328-usb2phy
- rockchip,rk3366-usb2phy
- rockchip,rk3399-usb2phy
+ - rockchip,rk3568-usb2phy
- rockchip,rv1108-usb2phy
reg:
@@ -50,6 +51,10 @@ properties:
description:
Phandle to the extcon device providing the cable state for the otg phy.
+ interrupts:
+ description: Muxed interrupt for both ports
+ maxItems: 1
+
rockchip,usbgrf:
$ref: /schemas/types.yaml#/definitions/phandle
description:
@@ -67,6 +72,7 @@ properties:
interrupts:
description: host linestate interrupt
+ maxItems: 1
interrupt-names:
const: linestate
@@ -78,8 +84,6 @@ properties:
required:
- "#phy-cells"
- - interrupts
- - interrupt-names
otg-port:
type: object
@@ -109,8 +113,6 @@ properties:
required:
- "#phy-cells"
- - interrupts
- - interrupt-names
required:
- compatible
@@ -120,6 +122,40 @@ required:
- host-port
- otg-port
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3568-usb2phy
+
+ then:
+ properties:
+ host-port:
+ properties:
+ interrupts: false
+
+ otg-port:
+ properties:
+ interrupts: false
+
+ required:
+ - interrupts
+
+ else:
+ properties:
+ interrupts: false
+
+ host-port:
+ required:
+ - interrupts
+ - interrupt-names
+
+ otg-port:
+ required:
+ - interrupts
+ - interrupt-names
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt
deleted file mode 100644
index d23ff90baad5..000000000000
--- a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-NVIDIA Tegra194 P2U binding
-
-Tegra194 has two PHY bricks namely HSIO (High Speed IO) and NVHS (NVIDIA High
-Speed) each interfacing with 12 and 8 P2U instances respectively.
-A P2U instance is a glue logic between Synopsys DesignWare Core PCIe IP's PIPE
-interface and PHY of HSIO/NVHS bricks. Each P2U instance represents one PCIe
-lane.
-
-Required properties:
-- compatible: For Tegra19x, must contain "nvidia,tegra194-p2u".
-- reg: Should be the physical address space and length of respective each P2U
- instance.
-- reg-names: Must include the entry "ctl".
-
-Required properties for PHY port node:
-- #phy-cells: Defined by generic PHY bindings. Must be 0.
-
-Refer to phy/phy-bindings.txt for the generic PHY binding properties.
-
-Example:
-
-p2u_hsio_0: phy@3e10000 {
- compatible = "nvidia,tegra194-p2u";
- reg = <0x03e10000 0x10000>;
- reg-names = "ctl";
-
- #phy-cells = <0>;
-};
diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml
new file mode 100644
index 000000000000..9a89d05efbda
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/phy-tegra194-p2u.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: NVIDIA Tegra194 P2U binding
+
+maintainers:
+ - Thierry Reding <treding@nvidia.com>
+
+description: >
+ Tegra194 has two PHY bricks namely HSIO (High Speed IO) and NVHS (NVIDIA High
+ Speed) each interfacing with 12 and 8 P2U instances respectively.
+ A P2U instance is a glue logic between Synopsys DesignWare Core PCIe IP's PIPE
+ interface and PHY of HSIO/NVHS bricks. Each P2U instance represents one PCIe
+ lane.
+
+properties:
+ compatible:
+ const: nvidia,tegra194-p2u
+
+ reg:
+ maxItems: 1
+ description: Should be the physical address space and length of respective each P2U instance.
+
+ reg-names:
+ items:
+ - const: ctl
+
+ '#phy-cells':
+ const: 0
+
+additionalProperties: false
+
+examples:
+ - |
+ p2u_hsio_0: phy@3e10000 {
+ compatible = "nvidia,tegra194-p2u";
+ reg = <0x03e10000 0x10000>;
+ reg-names = "ctl";
+
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml
new file mode 100644
index 000000000000..9076e19b6417
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/phy/qcom,edp-phy.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Qualcomm eDP PHY
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description:
+ The Qualcomm eDP PHY is found in a number of Qualcomm platform and provides
+ the physical interface for Embedded Display Port.
+
+properties:
+ compatible:
+ const: qcom,sc8180x-edp-phy
+
+ reg:
+ items:
+ - description: PHY base register block
+ - description: tx0 register block
+ - description: tx1 register block
+ - description: PLL register block
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: aux
+ - const: cfg_ahb
+
+ "#clock-cells":
+ const: 1
+
+ "#phy-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ phy@aec2a00 {
+ compatible = "qcom,sc8180x-edp-phy";
+ reg = <0x0aec2a00 0x1c0>,
+ <0x0aec2200 0xa0>,
+ <0x0aec2600 0xa0>,
+ <0x0aec2000 0x19c>;
+
+ clocks = <&dispcc 0>, <&dispcc 1>;
+ clock-names = "aux", "cfg_ahb";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
index 630ceaf915e2..e417cd667997 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
@@ -50,6 +50,10 @@ properties:
- qcom,sm8350-qmp-ufs-phy
- qcom,sm8350-qmp-usb3-phy
- qcom,sm8350-qmp-usb3-uni-phy
+ - qcom,sm8450-qmp-gen3x1-pcie-phy
+ - qcom,sm8450-qmp-gen4x2-pcie-phy
+ - qcom,sm8450-qmp-ufs-phy
+ - qcom,sm8450-qmp-usb3-phy
- qcom,sdx55-qmp-pcie-phy
- qcom,sdx55-qmp-usb3-uni-phy
@@ -332,6 +336,8 @@ allOf:
- qcom,sm8250-qmp-gen3x1-pcie-phy
- qcom,sm8250-qmp-gen3x2-pcie-phy
- qcom,sm8250-qmp-modem-pcie-phy
+ - qcom,sm8450-qmp-gen3x1-pcie-phy
+ - qcom,sm8450-qmp-gen4x2-pcie-phy
then:
properties:
clocks:
diff --git a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml
index aa2e409a1a09..e651a63a4be3 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml
@@ -30,6 +30,7 @@ properties:
- enum:
- qcom,sc7180-qusb2-phy
- qcom,sdm845-qusb2-phy
+ - qcom,sm6350-qusb2-phy
- const: qcom,qusb2-v2-phy
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml
index 20203a8a9e41..0dfe6914ec87 100644
--- a/Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml
@@ -20,6 +20,7 @@ properties:
- qcom,sm8150-usb-hs-phy
- qcom,sm8250-usb-hs-phy
- qcom,sm8350-usb-hs-phy
+ - qcom,sm8450-usb-hs-phy
- qcom,usb-snps-femto-v2-phy
reg:
diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml
index 745c525ce6b9..3b400a85b44a 100644
--- a/Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml
@@ -16,6 +16,7 @@ maintainers:
properties:
compatible:
enum:
+ - socionext,uniphier-pro4-ahci-phy
- socionext,uniphier-pxs2-ahci-phy
- socionext,uniphier-pxs3-ahci-phy
@@ -26,23 +27,35 @@ properties:
const: 0
clocks:
+ minItems: 1
maxItems: 2
clock-names:
oneOf:
- items: # for PXs2
- const: link
+ - items: # for Pro4
+ - const: link
+ - const: gio
- items: # for others
- const: link
- const: phy
resets:
- maxItems: 2
+ minItems: 2
+ maxItems: 5
reset-names:
- items:
- - const: link
- - const: phy
+ oneOf:
+ - items: # for Pro4
+ - const: link
+ - const: gio
+ - const: pm
+ - const: tx
+ - const: rx
+ - items: # for others
+ - const: link
+ - const: phy
required:
- compatible
diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml
index 3e0566899041..fbb71d6dd531 100644
--- a/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml
@@ -19,6 +19,7 @@ properties:
- socionext,uniphier-pro5-pcie-phy
- socionext,uniphier-ld20-pcie-phy
- socionext,uniphier-pxs3-pcie-phy
+ - socionext,uniphier-nx1-pcie-phy
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml
index a681cbc3b4ef..33946efcac5e 100644
--- a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml
@@ -22,6 +22,7 @@ properties:
- socionext,uniphier-pxs2-usb3-hsphy
- socionext,uniphier-ld20-usb3-hsphy
- socionext,uniphier-pxs3-usb3-hsphy
+ - socionext,uniphier-nx1-usb3-hsphy
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml
index 41c0dd68ee25..92d46eb913a3 100644
--- a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml
@@ -23,6 +23,7 @@ properties:
- socionext,uniphier-pxs2-usb3-ssphy
- socionext,uniphier-ld20-usb3-ssphy
- socionext,uniphier-pxs3-usb3-ssphy
+ - socionext,uniphier-nx1-usb3-ssphy
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
index 80020539c3bb..5cd512b7d5ba 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml
@@ -51,15 +51,6 @@ properties:
appropriate of the LOCHNAGARx_PIN_NUM_GPIOS define, see [3].
maxItems: 1
- pinctrl-0:
- description:
- A phandle to the default pinctrl state.
-
- pinctrl-names:
- description:
- A pinctrl state named "default" must be defined.
- const: default
-
pin-settings:
type: object
patternProperties:
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
index e50d7ad5c229..c85f759ae5a3 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
@@ -30,16 +30,6 @@ description: |
Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
properties:
- pinctrl-0:
- description:
- A phandle to the node containing the subnodes containing default
- configurations.
-
- pinctrl-names:
- description:
- A pinctrl state named "default" must be defined.
- const: default
-
pin-settings:
description:
One subnode is required to contain the default settings. It
diff --git a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
index 3dd22220cb5f..a72d5c721516 100644
--- a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
+++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
@@ -43,7 +43,7 @@ properties:
priority:
$ref: /schemas/types.yaml#/definitions/uint32
description: |
- A priority ranging from 0 to 255 (default 128) according to the following guidelines:
+ A priority ranging from 0 to 255 (default 129) according to the following guidelines:
0: Restart handler of last resort, with limited restart capabilities.
128: Default restart handler; use if no other restart handler is expected to be available,
@@ -51,7 +51,7 @@ properties:
255: Highest priority restart handler, will preempt all other restart handlers.
minimum: 0
maximum: 255
- default: 128
+ default: 129
active-delay:
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/pwm/pwm.yaml b/Documentation/devicetree/bindings/pwm/pwm.yaml
index 2effe6c0de6b..3c01f85029e5 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/pwm.yaml
@@ -9,6 +9,8 @@ title: PWM controllers (providers)
maintainers:
- Thierry Reding <thierry.reding@gmail.com>
+select: false
+
properties:
$nodename:
pattern: "^pwm(@.*|-[0-9a-f])*$"
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
index 63e06d93bca3..c635c181d2c2 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
@@ -33,6 +33,9 @@ properties:
- qcom,sdm845-adsp-pas
- qcom,sdm845-cdsp-pas
- qcom,sdx55-mpss-pas
+ - qcom,sm6350-adsp-pas
+ - qcom,sm6350-cdsp-pas
+ - qcom,sm6350-mpss-pas
- qcom,sm8150-adsp-pas
- qcom,sm8150-cdsp-pas
- qcom,sm8150-mpss-pas
@@ -158,6 +161,9 @@ allOf:
- qcom,sc8180x-mpss-pas
- qcom,sdm845-adsp-pas
- qcom,sdm845-cdsp-pas
+ - qcom,sm6350-adsp-pas
+ - qcom,sm6350-cdsp-pas
+ - qcom,sm6350-mpss-pas
- qcom,sm8150-adsp-pas
- qcom,sm8150-cdsp-pas
- qcom,sm8150-mpss-pas
@@ -266,6 +272,8 @@ allOf:
- qcom,sc8180x-cdsp-pas
- qcom,sdm845-adsp-pas
- qcom,sdm845-cdsp-pas
+ - qcom,sm6350-adsp-pas
+ - qcom,sm6350-cdsp-pas
- qcom,sm8150-adsp-pas
- qcom,sm8150-cdsp-pas
- qcom,sm8150-slpi-pas
@@ -301,6 +309,7 @@ allOf:
- qcom,sc7280-mpss-pas
- qcom,sc8180x-mpss-pas
- qcom,sdx55-mpss-pas
+ - qcom,sm6350-mpss-pas
- qcom,sm8150-mpss-pas
- qcom,sm8350-mpss-pas
then:
@@ -390,6 +399,23 @@ allOf:
compatible:
contains:
enum:
+ - qcom,sm6350-cdsp-pas
+ then:
+ properties:
+ power-domains:
+ items:
+ - description: CX power domain
+ - description: MX power domain
+ power-domain-names:
+ items:
+ - const: cx
+ - const: mx
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
- qcom,sm8150-adsp-pas
- qcom,sm8150-cdsp-pas
then:
@@ -405,6 +431,7 @@ allOf:
enum:
- qcom,sc7280-mpss-pas
- qcom,sdx55-mpss-pas
+ - qcom,sm6350-mpss-pas
- qcom,sm8150-mpss-pas
- qcom,sm8350-mpss-pas
then:
@@ -425,6 +452,7 @@ allOf:
enum:
- qcom,sc8180x-adsp-pas
- qcom,sc8180x-cdsp-pas
+ - qcom,sm6350-adsp-pas
- qcom,sm8150-slpi-pas
- qcom,sm8250-adsp-pas
- qcom,sm8250-slpi-pas
diff --git a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml
new file mode 100644
index 000000000000..a7d25fa920e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/remoteproc/renesas,rcar-rproc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Renesas R-Car remote processor controller bindings
+
+maintainers:
+ - Julien Massot <julien.massot@iot.bzh>
+
+description: |
+ This document defines the bindings for the remoteproc component that loads and
+ boots firmwares on the Renesas R-Car family chipset.
+ R-Car gen3 family may have a realtime processor, this processor shares peripheral
+ and RAM with the host processor with the same address map.
+
+properties:
+ compatible:
+ const: renesas,rcar-cr7
+
+ resets:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ memory-region:
+ description:
+ List of phandles to the reserved memory regions associated with the
+ remoteproc device. This is variable and describes the memories shared with
+ the remote processor (e.g. remoteproc firmware and carveouts, rpmsg
+ vrings, ...).
+ (see ../reserved-memory/reserved-memory.yaml)
+
+required:
+ - compatible
+ - resets
+ - memory-region
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
+ #include <dt-bindings/power/r8a7795-sysc.h>
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cr7_ram: cr7_ram@40040000 {
+ no-map;
+ reg = <0x0 0x40040000 0x0 0x1fc0000>;
+ };
+ };
+
+ cr7_rproc: cr7 {
+ compatible = "renesas,rcar-cr7";
+ memory-region = <&cr7_ram>;
+ power-domains = <&sysc R8A7795_PD_CR7>;
+ resets = <&cpg 222>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml
index 5ec6505ac408..7b56497eec4d 100644
--- a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml
@@ -33,9 +33,11 @@ properties:
enum:
- ti,j721e-c66-dsp
- ti,j721e-c71-dsp
+ - ti,j721s2-c71-dsp
description:
Use "ti,j721e-c66-dsp" for C66x DSPs on K3 J721E SoCs
Use "ti,j721e-c71-dsp" for C71x DSPs on K3 J721E SoCs
+ Use "ti,j721s2-c71-dsp" for C71x DSPs on K3 J721S2 SoCs
resets:
description: |
@@ -106,6 +108,7 @@ else:
compatible:
enum:
- ti,j721e-c71-dsp
+ - ti,j721s2-c71-dsp
then:
properties:
reg:
diff --git a/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml
index eeef255c4045..d9c7e8c2b268 100644
--- a/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml
@@ -38,6 +38,7 @@ properties:
- ti,j721e-r5fss
- ti,j7200-r5fss
- ti,am64-r5fss
+ - ti,j721s2-r5fss
power-domains:
description: |
@@ -64,9 +65,9 @@ properties:
description: |
Configuration Mode for the Dual R5F cores within the R5F cluster.
Should be either a value of 1 (LockStep mode) or 0 (Split mode) on
- most SoCs (AM65x, J721E, J7200), default is LockStep mode if omitted;
- and should be either a value of 0 (Split mode) or 2 (Single-CPU mode)
- on AM64x SoCs, default is Split mode if omitted.
+ most SoCs (AM65x, J721E, J7200, J721s2), default is LockStep mode if
+ omitted; and should be either a value of 0 (Split mode) or 2
+ (Single-CPU mode) on AM64x SoCs, default is Split mode if omitted.
# R5F Processor Child Nodes:
# ==========================
@@ -104,6 +105,7 @@ patternProperties:
- ti,j721e-r5f
- ti,j7200-r5f
- ti,am64-r5f
+ - ti,j721s2-r5f
reg:
items:
diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
index 29fe39bb08ad..d12855e7ffd7 100644
--- a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
+++ b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
@@ -15,6 +15,7 @@ allOf:
properties:
compatible:
enum:
+ - epson,rx8804
- epson,rx8900
- microcrystal,rv8803
diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
index 4fba6dba16f3..6fa7d9fc2dc7 100644
--- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
@@ -19,7 +19,14 @@ properties:
- qcom,pmk8350-rtc
reg:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: rtc
+ - const: alarm
interrupts:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
index 2359f541b770..764717ce1873 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
@@ -127,6 +127,7 @@ examples:
st,syscfg = <&pwrcfg 0x00 0x100>;
};
+ - |
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/stm32mp1-clks.h>
rtc@5c004000 {
diff --git a/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml b/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
new file mode 100644
index 000000000000..fd1b3e71ff2c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/sunplus,sp7021-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus SP7021 Real Time Clock controller
+
+maintainers:
+ - Vincent Shih <vincent.sunplus@gmail.com>
+
+properties:
+ compatible:
+ const: sunplus,sp7021-rtc
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ items:
+ - const: rtc
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - resets
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ rtc: serial@9c003a00 {
+ compatible = "sunplus,sp7021-rtc";
+ reg = <0x9c003a00 0x80>;
+ reg-names = "rtc";
+ clocks = <&clkc 0x12>;
+ resets = <&rstc 0x02>;
+ interrupt-parent = <&intc>;
+ interrupts = <163 IRQ_TYPE_EDGE_RISING>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
index 2e3628ef48df..84c4d6cba521 100644
--- a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml
@@ -110,12 +110,6 @@ properties:
Internal DMA register base address of the audio
subsystem (used in secondary sound source).
- pinctrl-0:
- description: Should specify pin control groups used for this controller.
-
- pinctrl-names:
- const: default
-
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml b/Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml
new file mode 100644
index 000000000000..2445c5e0b0ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spmi/mtk,spmi-mtk-pmif.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek SPMI Controller Device Tree Bindings
+
+maintainers:
+ - Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
+
+description: |+
+ On MediaTek SoCs the PMIC is connected via SPMI and the controller allows
+ for multiple SoCs to control a single SPMI master.
+
+allOf:
+ - $ref: "spmi.yaml"
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt6873-spmi
+ - mediatek,mt8195-spmi
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pmif
+ - const: spmimst
+
+ clocks:
+ minItems: 3
+ maxItems: 3
+
+ clock-names:
+ items:
+ - const: pmif_sys_ck
+ - const: pmif_tmr_ck
+ - const: spmimst_clk_mux
+
+ assigned-clocks:
+ maxItems: 1
+
+ assigned-clock-parents:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8192-clk.h>
+
+ spmi: spmi@10027000 {
+ compatible = "mediatek,mt6873-spmi";
+ reg = <0x10027000 0xe00>,
+ <0x10029000 0x100>;
+ reg-names = "pmif", "spmimst";
+ clocks = <&infracfg CLK_INFRA_PMIC_AP>,
+ <&infracfg CLK_INFRA_PMIC_TMR>,
+ <&topckgen CLK_TOP_SPMI_MST_SEL>;
+ clock-names = "pmif_sys_ck",
+ "pmif_tmr_ck",
+ "spmimst_clk_mux";
+ assigned-clocks = <&topckgen CLK_TOP_PWRAP_ULPOSC_SEL>;
+ assigned-clock-parents = <&topckgen CLK_TOP_OSC_D10>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/spmi/spmi.yaml b/Documentation/devicetree/bindings/spmi/spmi.yaml
index 1d243faef2f8..c1b06fa5c631 100644
--- a/Documentation/devicetree/bindings/spmi/spmi.yaml
+++ b/Documentation/devicetree/bindings/spmi/spmi.yaml
@@ -24,9 +24,6 @@ properties:
$nodename:
pattern: "^spmi@.*"
- reg:
- maxItems: 1
-
"#address-cells":
const: 2
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 9af1b0f4ecea..091792ba993e 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -31,7 +31,7 @@ properties:
- enum:
# SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin
- ad,ad7414
- # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
+ # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
- ad,adm9240
# AD5110 - Nonvolatile Digital Potentiometer
- adi,ad5110
@@ -43,7 +43,7 @@ properties:
- adi,adp5589
# AMS iAQ-Core VOC Sensor
- ams,iaq-core
- # i2c serial eeprom (24cxx)
+ # i2c serial eeprom (24cxx)
- at,24c08
# i2c trusted platform module (TPM)
- atmel,at97sc3204t
@@ -303,9 +303,9 @@ properties:
- skyworks,sky81452
# Socionext SynQuacer TPM MMIO module
- socionext,synquacer-tpm-mmio
- # i2c serial eeprom (24cxx)
- - sparkfun,qwiic-joystick
# SparkFun Qwiic Joystick (COM-15168) with i2c interface
+ - sparkfun,qwiic-joystick
+ # i2c serial eeprom (24cxx)
- st,24c256
# Ambient Light Sensor with SMBUS/Two Wire Serial Interface
- taos,tsl2550
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index d8fd4df81743..d0fee78e6203 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -16,6 +16,7 @@ Required properties:
"qcom,sm8150-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
"qcom,sm8250-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
"qcom,sm8350-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
+ "qcom,sm8450-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
- interrupts : <interrupt mapping for UFS host controller IRQ>
- reg : <registers mapping>
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index c48ce3c54951..294093d45a23 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -25,6 +25,8 @@ patternProperties:
# Keep list in alphabetical order.
"^70mai,.*":
description: 70mai Co., Ltd.
+ "^8dev,.*":
+ description: 8devices, UAB
"^abb,.*":
description: ABB
"^abilis,.*":
@@ -441,6 +443,8 @@ patternProperties:
description: Freescale Semiconductor
"^fujitsu,.*":
description: Fujitsu Ltd.
+ "^fxtec,.*":
+ description: FX Technology Ltd.
"^gardena,.*":
description: GARDENA GmbH
"^gateworks,.*":
@@ -515,6 +519,8 @@ patternProperties:
description: HannStar Display Co.
"^holtek,.*":
description: Holtek Semiconductor, Inc.
+ "^huawei,.*":
+ description: Huawei Technologies Co., Ltd.
"^hugsun,.*":
description: Shenzhen Hugsun Technology Co. Ltd.
"^hwacom,.*":
@@ -1207,6 +1213,8 @@ patternProperties:
description: THine Electronics, Inc.
"^thingyjp,.*":
description: thingy.jp
+ "^thundercomm,.*":
+ description: Thundercomm Technology Co., Ltd.
"^ti,.*":
description: Texas Instruments
"^tianma,.*":
@@ -1334,6 +1342,8 @@ patternProperties:
description: Wiligear, Ltd.
"^winbond,.*":
description: Winbond Electronics corp.
+ "^wingtech,.*":
+ description: Wingtech Technology Co., Ltd.
"^winlink,.*":
description: WinLink Co., Ltd
"^winstar,.*":
diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt
deleted file mode 100644
index 84122270be8f..000000000000
--- a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-BCM7038 Watchdog timer
-
-Required properties:
-
-- compatible : should be "brcm,bcm7038-wdt"
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-
-- clocks: The clock running the watchdog. If no clock is found the
- driver will default to 27000000 Hz.
-
-Example:
-
-watchdog@f040a7e8 {
- compatible = "brcm,bcm7038-wdt";
- clocks = <&upg_fixed>;
- reg = <0xf040a7e8 0x16>;
-};
diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml
new file mode 100644
index 000000000000..a926809352b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/brcm,bcm7038-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: BCM63xx and BCM7038 watchdog timer
+
+allOf:
+ - $ref: "watchdog.yaml#"
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+ - Justin Chen <justinpopo6@gmail.com>
+ - Rafał Miłecki <rafal@milecki.pl>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm6345-wdt
+ - brcm,bcm7038-wdt
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description: >
+ The clock running the watchdog. If no clock is found the driver will
+ default to 27000000 Hz.
+
+unevaluatedProperties: false
+
+required:
+ - reg
+
+examples:
+ - |
+ watchdog@f040a7e8 {
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0xf040a7e8 0x16>;
+ clocks = <&upg_fixed>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
index 51d6d482bbc2..fb603a20e396 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
@@ -14,8 +14,11 @@ allOf:
properties:
compatible:
- enum:
- - fsl,imx7ulp-wdt
+ oneOf:
+ - const: fsl,imx7ulp-wdt
+ - items:
+ - const: fsl,imx8ulp-wdt
+ - const: fsl,imx7ulp-wdt
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
index ba60bdf1fecc..16c6f82a13ca 100644
--- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
@@ -20,7 +20,9 @@ properties:
- qcom,apss-wdt-sc7280
- qcom,apss-wdt-sdm845
- qcom,apss-wdt-sdx55
+ - qcom,apss-wdt-sm6350
- qcom,apss-wdt-sm8150
+ - qcom,apss-wdt-sm8250
- qcom,kpss-timer
- qcom,kpss-wdt
- qcom,kpss-wdt-apq8064
diff --git a/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml b/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml
new file mode 100644
index 000000000000..11b220a5e0f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/realtek,otto-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek Otto watchdog timer
+
+maintainers:
+ - Sander Vanheule <sander@svanheule.net>
+
+description: |
+ The timer has two timeout phases. Both phases have a maximum duration of 32
+ prescaled clock ticks, which is ca. 43s with a bus clock of 200MHz. The
+ minimum duration of each phase is one tick. Each phase can trigger an
+ interrupt, although the phase 2 interrupt will occur with the system reset.
+ - Phase 1: During this phase, the WDT can be pinged to reset the timeout.
+ - Phase 2: Starts after phase 1 has timed out, and only serves to give the
+ system some time to clean up, or notify others that it's going to reset.
+ During this phase, pinging the WDT has no effect, and a reset is
+ unavoidable, unless the WDT is disabled.
+
+allOf:
+ - $ref: watchdog.yaml#
+
+properties:
+ compatible:
+ enum:
+ - realtek,rtl8380-wdt
+ - realtek,rtl8390-wdt
+ - realtek,rtl9300-wdt
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: interrupt specifier for pretimeout
+ - description: interrupt specifier for timeout
+
+ interrupt-names:
+ items:
+ - const: phase1
+ - const: phase2
+
+ realtek,reset-mode:
+ $ref: /schemas/types.yaml#/definitions/string
+ description: |
+ Specify how the system is reset after a timeout. Defaults to "cpu" if
+ left unspecified.
+ oneOf:
+ - description: Reset the entire chip
+ const: soc
+ - description: |
+ Reset the CPU and IPsec engine, but leave other peripherals untouched
+ const: cpu
+ - description: |
+ Reset the execution pointer, but don't actually reset any hardware
+ const: software
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+unevaluatedProperties: false
+
+dependencies:
+ interrupts: [ interrupt-names ]
+
+examples:
+ - |
+ watchdog: watchdog@3150 {
+ compatible = "realtek,rtl8380-wdt";
+ reg = <0x3150 0xc>;
+
+ realtek,reset-mode = "soc";
+
+ clocks = <&lxbus_clock>;
+ timeout-sec = <20>;
+
+ interrupt-parent = <&rtlintc>;
+ interrupt-names = "phase1", "phase2";
+ interrupts = <19>, <18>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml
index ab66d3f0c476..91a98ccd4226 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml
@@ -10,9 +10,6 @@ maintainers:
- Wolfram Sang <wsa+renesas@sang-engineering.com>
- Geert Uytterhoeven <geert+renesas@glider.be>
-allOf:
- - $ref: "watchdog.yaml#"
-
properties:
compatible:
oneOf:
@@ -24,6 +21,11 @@ properties:
- items:
- enum:
+ - renesas,r9a07g044-wdt # RZ/G2{L,LC}
+ - const: renesas,rzg2l-wdt # RZ/G2L
+
+ - items:
+ - enum:
- renesas,r8a7742-wdt # RZ/G1H
- renesas,r8a7743-wdt # RZ/G1M
- renesas,r8a7744-wdt # RZ/G1N
@@ -56,11 +58,13 @@ properties:
reg:
maxItems: 1
- interrupts:
- maxItems: 1
+ interrupts: true
- clocks:
- maxItems: 1
+ interrupt-names: true
+
+ clocks: true
+
+ clock-names: true
power-domains:
maxItems: 1
@@ -75,17 +79,52 @@ required:
- reg
- clocks
-if:
- not:
- properties:
- compatible:
- contains:
- enum:
- - renesas,rza-wdt
-then:
- required:
- - power-domains
- - resets
+allOf:
+ - $ref: "watchdog.yaml#"
+
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,rza-wdt
+ then:
+ required:
+ - power-domains
+ - resets
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,rzg2l-wdt
+ then:
+ properties:
+ interrupts:
+ maxItems: 2
+ interrupt-names:
+ items:
+ - const: wdt
+ - const: perrout
+ clocks:
+ items:
+ - description: Register access clock
+ - description: Main clock
+ clock-names:
+ items:
+ - const: pclk
+ - const: oscclk
+ required:
+ - clock-names
+ - interrupt-names
+ else:
+ properties:
+ interrupts:
+ maxItems: 1
+ clocks:
+ maxItems: 1
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
index 76cb9586ee00..b08373336b16 100644
--- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
@@ -22,25 +22,32 @@ properties:
- samsung,exynos5250-wdt # for Exynos5250
- samsung,exynos5420-wdt # for Exynos5420
- samsung,exynos7-wdt # for Exynos7
+ - samsung,exynos850-wdt # for Exynos850
reg:
maxItems: 1
clocks:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
clock-names:
- items:
- - const: watchdog
+ minItems: 1
+ maxItems: 2
interrupts:
maxItems: 1
+ samsung,cluster-index:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Index of CPU cluster on which watchdog is running (in case of Exynos850)
+
samsung,syscon-phandle:
$ref: /schemas/types.yaml#/definitions/phandle
description:
- Phandle to the PMU system controller node (in case of Exynos5250
- and Exynos5420).
+ Phandle to the PMU system controller node (in case of Exynos5250,
+ Exynos5420, Exynos7 and Exynos850).
required:
- compatible
@@ -58,9 +65,40 @@ allOf:
enum:
- samsung,exynos5250-wdt
- samsung,exynos5420-wdt
+ - samsung,exynos7-wdt
+ - samsung,exynos850-wdt
then:
required:
- samsung,syscon-phandle
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos850-wdt
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Bus clock, used for register interface
+ - description: Source clock (driving watchdog counter)
+ clock-names:
+ items:
+ - const: watchdog
+ - const: watchdog_src
+ samsung,cluster-index:
+ enum: [0, 1]
+ required:
+ - samsung,cluster-index
+ else:
+ properties:
+ clocks:
+ items:
+ - description: Bus clock, which is also a source clock
+ clock-names:
+ items:
+ - const: watchdog
+ samsung,cluster-index: false
unevaluatedProperties: false
diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst
index ee268d445d38..cf9859cd0b43 100644
--- a/Documentation/driver-api/dmaengine/dmatest.rst
+++ b/Documentation/driver-api/dmaengine/dmatest.rst
@@ -6,6 +6,16 @@ Andy Shevchenko <andriy.shevchenko@linux.intel.com>
This small document introduces how to test DMA drivers using dmatest module.
+The dmatest module tests DMA memcpy, memset, XOR and RAID6 P+Q operations using
+various lengths and various offsets into the source and destination buffers. It
+will initialize both buffers with a repeatable pattern and verify that the DMA
+engine copies the requested region and nothing more. It will also verify that
+the bytes aren't swapped around, and that the source buffer isn't modified.
+
+The dmatest module can be configured to test a specific channel. It can also
+test multiple channels at the same time, and it can start multiple threads
+competing for the same channel.
+
.. note::
The test suite works only on the channels that have at least one
capability of the following: DMA_MEMCPY (memory-to-memory), DMA_MEMSET
@@ -143,13 +153,14 @@ Part 5 - Handling channel allocation
Allocating Channels
-------------------
-Channels are required to be configured prior to starting the test run.
-Attempting to run the test without configuring the channels will fail.
+Channels do not need to be configured prior to starting a test run. Attempting
+to run the test without configuring the channels will result in testing any
+channels that are available.
Example::
% echo 1 > /sys/module/dmatest/parameters/run
- dmatest: Could not start test, no channels configured
+ dmatest: No channels configured, continue with any
Channels are registered using the "channel" parameter. Channels can be requested by their
name, once requested, the channel is registered and a pending thread is added to the test list.
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index ddb0a81a796c..0072c9c7efd3 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -162,6 +162,29 @@ Currently, the types available are:
- The device is able to do memory to memory copies
+- - DMA_MEMCPY_SG
+
+ - The device supports memory to memory scatter-gather transfers.
+
+ - Even though a plain memcpy can look like a particular case of a
+ scatter-gather transfer, with a single chunk to copy, it's a distinct
+ transaction type in the mem2mem transfer case. This is because some very
+ simple devices might be able to do contiguous single-chunk memory copies,
+ but have no support for more complex SG transfers.
+
+ - No matter what the overall size of the combined chunks for source and
+ destination is, only as many bytes as the smallest of the two will be
+ transmitted. That means the number and size of the scatter-gather buffers in
+ both lists need not be the same, and that the operation functionally is
+ equivalent to a ``strncpy`` where the ``count`` argument equals the smallest
+ total size of the two scatter-gather list buffers.
+
+ - It's usually used for copying pixel data between host memory and
+ memory-mapped GPU device memory, such as found on modern PCI video graphics
+ cards. The most immediate example is the OpenGL API function
+ ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer
+ from local device memory onto host memory.
+
- DMA_XOR
- The device is able to perform XOR operations on memory areas
diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst
index 94a2d7f01d99..d3cfa73cbb2b 100644
--- a/Documentation/driver-api/firewire.rst
+++ b/Documentation/driver-api/firewire.rst
@@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module
Firewire char device data structures
====================================
-.. include:: /ABI/stable/firewire-cdev
+.. include:: ../ABI/stable/firewire-cdev
:literal:
.. kernel-doc:: include/uapi/linux/firewire-cdev.h
@@ -28,7 +28,7 @@ Firewire char device data structures
Firewire device probing and sysfs interfaces
============================================
-.. include:: /ABI/stable/sysfs-bus-firewire
+.. include:: ../ABI/stable/sysfs-bus-firewire
:literal:
.. kernel-doc:: drivers/firewire/core-device.c
diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
index 8d650b4e2ce6..604208534095 100644
--- a/Documentation/driver-api/fpga/fpga-bridge.rst
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -6,8 +6,7 @@ API to implement a new FPGA bridge
* struct fpga_bridge - The FPGA Bridge structure
* struct fpga_bridge_ops - Low level Bridge driver ops
-* devm_fpga_bridge_create() - Allocate and init a bridge struct
-* fpga_bridge_register() - Register a bridge
+* fpga_bridge_register() - Create and register a bridge
* fpga_bridge_unregister() - Unregister a bridge
.. kernel-doc:: include/linux/fpga/fpga-bridge.h
@@ -17,9 +16,6 @@ API to implement a new FPGA bridge
:functions: fpga_bridge_ops
.. kernel-doc:: drivers/fpga/fpga-bridge.c
- :functions: devm_fpga_bridge_create
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
:functions: fpga_bridge_register
.. kernel-doc:: drivers/fpga/fpga-bridge.c
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 4d926b452cb3..42c01f396dce 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -24,7 +24,7 @@ How to support a new FPGA device
--------------------------------
To add another FPGA manager, write a driver that implements a set of ops. The
-probe function calls fpga_mgr_register(), such as::
+probe function calls fpga_mgr_register() or fpga_mgr_register_full(), such as::
static const struct fpga_manager_ops socfpga_fpga_ops = {
.write_init = socfpga_fpga_ops_configure_init,
@@ -49,14 +49,14 @@ probe function calls fpga_mgr_register(), such as::
* them in priv
*/
- mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
- &socfpga_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
+ mgr = fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager",
+ &socfpga_fpga_ops, priv);
+ if (IS_ERR(mgr))
+ return PTR_ERR(mgr);
platform_set_drvdata(pdev, mgr);
- return fpga_mgr_register(mgr);
+ return 0;
}
static int socfpga_fpga_remove(struct platform_device *pdev)
@@ -68,6 +68,11 @@ probe function calls fpga_mgr_register(), such as::
return 0;
}
+Alternatively, the probe function could call one of the resource managed
+register functions, devm_fpga_mgr_register() or devm_fpga_mgr_register_full().
+When these functions are used, the parameter syntax is the same, but the call
+to fpga_mgr_unregister() should be removed. In the above example, the
+socfpga_fpga_remove() function would not be required.
The ops will implement whatever device specific register writes are needed to
do the programming sequence for this particular FPGA. These ops return 0 for
@@ -104,8 +109,14 @@ API for implementing a new FPGA Manager driver
* ``fpga_mgr_states`` - Values for :c:expr:`fpga_manager->state`.
* struct fpga_manager - the FPGA manager struct
* struct fpga_manager_ops - Low level FPGA manager driver ops
-* devm_fpga_mgr_create() - Allocate and init a manager struct
-* fpga_mgr_register() - Register an FPGA manager
+* struct fpga_manager_info - Parameter structure for fpga_mgr_register_full()
+* fpga_mgr_register_full() - Create and register an FPGA manager using the
+ fpga_mgr_info structure to provide the full flexibility of options
+* fpga_mgr_register() - Create and register an FPGA manager using standard
+ arguments
+* devm_fpga_mgr_register_full() - Resource managed version of
+ fpga_mgr_register_full()
+* devm_fpga_mgr_register() - Resource managed version of fpga_mgr_register()
* fpga_mgr_unregister() - Unregister an FPGA manager
.. kernel-doc:: include/linux/fpga/fpga-mgr.h
@@ -117,11 +128,20 @@ API for implementing a new FPGA Manager driver
.. kernel-doc:: include/linux/fpga/fpga-mgr.h
:functions: fpga_manager_ops
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+ :functions: fpga_manager_info
+
.. kernel-doc:: drivers/fpga/fpga-mgr.c
- :functions: devm_fpga_mgr_create
+ :functions: fpga_mgr_register_full
.. kernel-doc:: drivers/fpga/fpga-mgr.c
:functions: fpga_mgr_register
.. kernel-doc:: drivers/fpga/fpga-mgr.c
+ :functions: devm_fpga_mgr_register_full
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+ :functions: devm_fpga_mgr_register
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
:functions: fpga_mgr_unregister
diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
index 2636a27c11b2..dc55d60a0b4a 100644
--- a/Documentation/driver-api/fpga/fpga-region.rst
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -46,8 +46,11 @@ API to add a new FPGA region
----------------------------
* struct fpga_region - The FPGA region struct
-* devm_fpga_region_create() - Allocate and init a region struct
-* fpga_region_register() - Register an FPGA region
+* struct fpga_region_info - Parameter structure for fpga_region_register_full()
+* fpga_region_register_full() - Create and register an FPGA region using the
+ fpga_region_info structure to provide the full flexibility of options
+* fpga_region_register() - Create and register an FPGA region using standard
+ arguments
* fpga_region_unregister() - Unregister an FPGA region
The FPGA region's probe function will need to get a reference to the FPGA
@@ -75,8 +78,11 @@ following APIs to handle building or tearing down that list.
.. kernel-doc:: include/linux/fpga/fpga-region.h
:functions: fpga_region
+.. kernel-doc:: include/linux/fpga/fpga-region.h
+ :functions: fpga_region_info
+
.. kernel-doc:: drivers/fpga/fpga-region.c
- :functions: devm_fpga_region_create
+ :functions: fpga_region_register_full
.. kernel-doc:: drivers/fpga/fpga-region.c
:functions: fpga_region_register
diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst
index 1b487a331467..71ccc30e586b 100644
--- a/Documentation/driver-api/generic-counter.rst
+++ b/Documentation/driver-api/generic-counter.rst
@@ -262,11 +262,11 @@ order to communicate with the device: to read and write various Signals
and Counts, and to set and get the "action mode" and "function mode" for
various Synapses and Counts respectively.
-A defined counter_device structure may be registered to the system by
-passing it to the counter_register function, and unregistered by passing
-it to the counter_unregister function. Similarly, the
-devm_counter_register function may be used if device memory-managed
-registration is desired.
+A counter_device structure is allocated using counter_alloc() and then
+registered to the system by passing it to the counter_add() function, and
+unregistered by passing it to the counter_unregister function. There are
+device managed variants of these functions: devm_counter_alloc() and
+devm_counter_add().
The struct counter_comp structure is used to define counter extensions
for Signals, Synapses, and Counts.
diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst
index 7d2ef4e27273..4942e018db85 100644
--- a/Documentation/filesystems/ceph.rst
+++ b/Documentation/filesystems/ceph.rst
@@ -82,7 +82,7 @@ Mount Syntax
The basic mount syntax is::
- # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+ # mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]]
You only need to specify a single monitor, as the client will get the
full list when it connects. (However, if the monitor you specify
@@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at
1.2.3.4::
- # mount -t ceph 1.2.3.4:/ /mnt/ceph
+ # mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4
is sufficient. If /sbin/mount.ceph is installed, a hostname can be
-used instead of an IP address.
+used instead of an IP address and the cluster FSID can be left out
+(as the mount helper will fill it in by reading the ceph configuration
+file)::
+ # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr
+Multiple monitor addresses can be passed by separating each address with a slash (`/`)::
+
+ # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101
+
+When using the mount helper, monitor address can be read from ceph
+configuration file if available. Note that, the cluster FSID (passed as part
+of the device string) is validated by checking it with the FSID reported by
+the monitor.
Mount Options
=============
+ mon_addr=ip_address[:port][/ip_address[:port]]
+ Monitor address to the cluster. This is used to bootstrap the
+ connection to the cluster. Once connection is established, the
+ monitor addresses in the monitor map are followed.
+
+ fsid=cluster-id
+ FSID of the cluster (from `ceph fsid` command).
+
ip=A.B.C.D[:N]
Specify the IP and/or port the client should bind to locally.
There is normally not much reason to do this. If the IP is not
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index d7b84695f56a..4a2426f0485a 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -198,6 +198,7 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_WRITE_IO 0x000004000
FAULT_SLAB_ALLOC 0x000008000
FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
=================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 8d7f141c6fc7..061744c436d9 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data).
The "pathname" shows the name associated file for this mapping. If the mapping
is not associated with a file:
- ======= ====================================
+ ============= ====================================
[heap] the heap of the program
[stack] the stack of the main process
[vdso] the "virtual dynamic shared object",
the kernel system call handler
- ======= ====================================
+ [anon:<name>] an anonymous mapping that has been
+ named by userspace
+ ============= ====================================
or if empty, the mapping is anonymous.
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 98c24183d8c3..93a5b6e1fabd 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -176,7 +176,7 @@ applicable everywhere (see syntax).
y y y Y/m/n
n m n N/m
m m m M/n
- y m n M/n
+ y m m M/n
y n * N
=== === ============= ==============
diff --git a/Documentation/livepatch/api.rst b/Documentation/livepatch/api.rst
new file mode 100644
index 000000000000..78944b63d74b
--- /dev/null
+++ b/Documentation/livepatch/api.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Livepatching APIs
+=================
+
+Livepatch Enablement
+====================
+
+.. kernel-doc:: kernel/livepatch/core.c
+ :export:
+
+
+Shadow Variables
+================
+
+.. kernel-doc:: kernel/livepatch/shadow.c
+ :export:
+
+System State Changes
+====================
+
+.. kernel-doc:: kernel/livepatch/state.c
+ :export:
+
+Object Types
+============
+
+.. kernel-doc:: include/linux/livepatch.h
+ :identifiers: klp_patch klp_object klp_func klp_callbacks klp_state
diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst
index 43cce5fad705..cebf1c71d4a5 100644
--- a/Documentation/livepatch/index.rst
+++ b/Documentation/livepatch/index.rst
@@ -14,6 +14,7 @@ Kernel Livepatching
shadow-vars
system-state
reliable-stacktrace
+ api
.. only:: subproject and html
diff --git a/Documentation/livepatch/shadow-vars.rst b/Documentation/livepatch/shadow-vars.rst
index 6a7d43a8787d..7a7098bfb5c8 100644
--- a/Documentation/livepatch/shadow-vars.rst
+++ b/Documentation/livepatch/shadow-vars.rst
@@ -82,8 +82,8 @@ to do actions that can be done only once when a new variable is allocated.
- call destructor function if defined
- free shadow variable
-* klp_shadow_free_all() - detach and free all <*, id> shadow variables
- - find and remove any <*, id> references from global hashtable
+* klp_shadow_free_all() - detach and free all <_, id> shadow variables
+ - find and remove any <_, id> references from global hashtable
- if found
diff --git a/Documentation/livepatch/system-state.rst b/Documentation/livepatch/system-state.rst
index c6d127c2d9aa..7a3935fd812b 100644
--- a/Documentation/livepatch/system-state.rst
+++ b/Documentation/livepatch/system-state.rst
@@ -52,12 +52,12 @@ struct klp_state:
The state can be manipulated using two functions:
- - *klp_get_state(patch, id)*
+ - klp_get_state()
- Get struct klp_state associated with the given livepatch
and state id.
- - *klp_get_prev_state(id)*
+ - klp_get_prev_state()
- Get struct klp_state associated with the given feature id and
already installed livepatches.
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index cf908d79666e..a337e8eabfe1 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -30,7 +30,7 @@ you probably needn't concern yourself with pcmciautils.
Program Minimal version Command to check the version
====================== =============== ========================================
GNU C 5.1 gcc --version
-Clang/LLVM (optional) 10.0.1 clang --version
+Clang/LLVM (optional) 11.0.0 clang --version
GNU make 3.81 make --version
binutils 2.23 ld -v
flex 2.5.35 flex --version
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index b7f98930d38d..1bd687b97104 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
- ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
- ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
- ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
- ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
- ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory
+ ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
+ ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
+ ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
+ ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
+ ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory
+ fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
__________________|____________|__________________|_________|____________________________________________________________
|
|
diff --git a/Documentation/staging/tee.rst b/Documentation/staging/tee.rst
index 3c63d8dcd61e..498343c7ab08 100644
--- a/Documentation/staging/tee.rst
+++ b/Documentation/staging/tee.rst
@@ -255,7 +255,7 @@ The following picture shows a high level overview of AMD-TEE::
+--------------------------+ +---------+--------------------+
At the lowest level (in x86), the AMD Secure Processor (ASP) driver uses the
-CPU to PSP mailbox regsister to submit commands to the PSP. The format of the
+CPU to PSP mailbox register to submit commands to the PSP. The format of the
command buffer is opaque to the ASP driver. It's role is to submit commands to
the secure processor and return results to AMD-TEE driver. The interface
between AMD-TEE driver and AMD Secure Processor driver can be found in [6].
@@ -290,7 +290,7 @@ cancel_req driver callback is not supported by AMD-TEE.
The GlobalPlatform TEE Client API [5] can be used by the user space (client) to
talk to AMD's TEE. AMD's TEE provides a secure environment for loading, opening
-a session, invoking commands and clossing session with TA.
+a session, invoking commands and closing session with TA.
References
==========
diff --git a/Documentation/tools/rtla/Makefile b/Documentation/tools/rtla/Makefile
new file mode 100644
index 000000000000..9f2b84af1a6c
--- /dev/null
+++ b/Documentation/tools/rtla/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Based on bpftool's Documentation Makefile
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+PREFIX ?= /usr/share
+MANDIR ?= $(PREFIX)/man
+MAN1DIR = $(MANDIR)/man1
+
+MAN1_RST = $(wildcard rtla*.rst)
+
+_DOC_MAN1 = $(patsubst %.rst,%.1,$(MAN1_RST))
+DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1))
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+RST2MAN_OPTS += --verbose
+
+$(OUTPUT)%.1: %.rst
+ifndef RST2MAN_DEP
+ $(error "rst2man not found, but required to generate man pages")
+endif
+ rst2man $(RST2MAN_OPTS) $< > $@
+
+man1: $(DOC_MAN1)
+man: man1
+
+clean:
+ $(RM) $(DOC_MAN1)
+
+install: man
+ $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)
+ $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(MAN1DIR)
+
+uninstall:
+ $(RM) $(addprefix $(DESTDIR)$(MAN1DIR)/,$(_DOC_MAN1))
+ $(RMDIR) $(DESTDIR)$(MAN1DIR)
+
+.PHONY: man man1 clean install uninstall
+.DEFAULT_GOAL := man
diff --git a/Documentation/tools/rtla/common_appendix.rst b/Documentation/tools/rtla/common_appendix.rst
new file mode 100644
index 000000000000..b494084acccd
--- /dev/null
+++ b/Documentation/tools/rtla/common_appendix.rst
@@ -0,0 +1,12 @@
+REPORTING BUGS
+==============
+Report bugs to <lkml@vger.kernel.org>
+
+LICENSE
+=======
+**rtla** is Free Software licensed under the GNU GPLv2
+
+COPYING
+=======
+Copyright \(C) 2021 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/tools/rtla/common_hist_options.rst b/Documentation/tools/rtla/common_hist_options.rst
new file mode 100644
index 000000000000..0266cd08a6c9
--- /dev/null
+++ b/Documentation/tools/rtla/common_hist_options.rst
@@ -0,0 +1,23 @@
+**-b**, **--bucket-size** *N*
+
+ Set the histogram bucket size (default *1*).
+
+**-e**, **--entries** *N*
+
+ Set the number of entries of the histogram (default 256).
+
+**--no-header**
+
+ Do not print header.
+
+**--no-summary**
+
+ Do not print summary.
+
+**--no-index**
+
+ Do not print index.
+
+**--with-zeros**
+
+ Print zero only entries.
diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst
new file mode 100644
index 000000000000..721790ad984e
--- /dev/null
+++ b/Documentation/tools/rtla/common_options.rst
@@ -0,0 +1,28 @@
+**-c**, **--cpus** *cpu-list*
+
+ Set the osnoise tracer to run the sample threads in the cpu-list.
+
+**-d**, **--duration** *time[s|m|h|d]*
+
+ Set the duration of the session.
+
+**-D**, **--debug**
+
+ Print debug info.
+
+**-t**, **--trace**\[*=file*]
+
+ Save the stopped trace to [*file|osnoise_trace.txt*].
+
+**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period*
+
+ Set scheduling parameters to the osnoise tracer threads, the format to set the priority are:
+
+ - *o:prio* - use SCHED_OTHER with *prio*;
+ - *r:prio* - use SCHED_RR with *prio*;
+ - *f:prio* - use SCHED_FIFO with *prio*;
+ - *d:runtime[us|ms|s]:period[us|ms|s]* - use SCHED_DEADLINE with *runtime* and *period* in nanoseconds.
+
+**-h**, **--help**
+
+ Print help menu.
diff --git a/Documentation/tools/rtla/common_osnoise_description.rst b/Documentation/tools/rtla/common_osnoise_description.rst
new file mode 100644
index 000000000000..8973c5df888f
--- /dev/null
+++ b/Documentation/tools/rtla/common_osnoise_description.rst
@@ -0,0 +1,8 @@
+The **rtla osnoise** tool is an interface for the *osnoise* tracer. The
+*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the
+time in a loop while with preemption, softirq and IRQs enabled, thus
+allowing all the sources of operating systme noise during its execution.
+The *osnoise*'s tracer threads take note of the delta between each time
+read, along with an interference counter of all sources of interference.
+At the end of each period, the *osnoise* tracer displays a summary of
+the results.
diff --git a/Documentation/tools/rtla/common_osnoise_options.rst b/Documentation/tools/rtla/common_osnoise_options.rst
new file mode 100644
index 000000000000..d556883e4e26
--- /dev/null
+++ b/Documentation/tools/rtla/common_osnoise_options.rst
@@ -0,0 +1,17 @@
+**-p**, **--period** *us*
+
+ Set the *osnoise* tracer period in microseconds.
+
+**-r**, **--runtime** *us*
+
+ Set the *osnoise* tracer runtime in microseconds.
+
+**-s**, **--stop** *us*
+
+ Stop the trace if a single sample is higher than the argument in microseconds.
+ If **-T** is set, it will also save the trace to the output.
+
+**-S**, **--stop-total** *us*
+
+ Stop the trace if the total sample is higher than the argument in microseconds.
+ If **-T** is set, it will also save the trace to the output.
diff --git a/Documentation/tools/rtla/common_timerlat_description.rst b/Documentation/tools/rtla/common_timerlat_description.rst
new file mode 100644
index 000000000000..321201cb8597
--- /dev/null
+++ b/Documentation/tools/rtla/common_timerlat_description.rst
@@ -0,0 +1,10 @@
+The **rtla timerlat** tool is an interface for the *timerlat* tracer. The
+*timerlat* tracer dispatches a kernel thread per-cpu. These threads
+set a periodic timer to wake themselves up and go back to sleep. After
+the wakeup, they collect and generate useful information for the
+debugging of operating system timer latency.
+
+The *timerlat* tracer outputs information in two ways. It periodically
+prints the timer latency at the timer *IRQ* handler and the *Thread*
+handler. It also enable the trace of the most relevant information via
+**osnoise:** tracepoints.
diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst
new file mode 100644
index 000000000000..e9c1bfd55d48
--- /dev/null
+++ b/Documentation/tools/rtla/common_timerlat_options.rst
@@ -0,0 +1,16 @@
+**-p**, **--period** *us*
+
+ Set the *timerlat* tracer period in microseconds.
+
+**-i**, **--irq** *us*
+
+ Stop trace if the *IRQ* latency is higher than the argument in us.
+
+**-T**, **--thread** *us*
+
+ Stop trace if the *Thread* latency is higher than the argument in us.
+
+**-s**, **--stack** *us*
+
+ Save the stack trace at the *IRQ* if a *Thread* latency is higher than the
+ argument in us.
diff --git a/Documentation/tools/rtla/common_top_options.rst b/Documentation/tools/rtla/common_top_options.rst
new file mode 100644
index 000000000000..f48878938f84
--- /dev/null
+++ b/Documentation/tools/rtla/common_top_options.rst
@@ -0,0 +1,3 @@
+**-q**, **--quiet**
+
+ Print only a summary at the end of the session.
diff --git a/Documentation/tools/rtla/rtla-osnoise-hist.rst b/Documentation/tools/rtla/rtla-osnoise-hist.rst
new file mode 100644
index 000000000000..52298ddd8701
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-osnoise-hist.rst
@@ -0,0 +1,66 @@
+===================
+rtla-osnoise-hist
+===================
+------------------------------------------------------
+Display a histogram of the osnoise tracer samples
+------------------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla osnoise hist** [*OPTIONS*]
+
+DESCRIPTION
+===========
+.. include:: common_osnoise_description.rst
+
+The **rtla osnoise hist** tool collects all **osnoise:sample_threshold**
+occurrence in a histogram, displaying the results in a user-friendly way.
+The tool also allows many configurations of the *osnoise* tracer and the
+collection of the tracer output.
+
+OPTIONS
+=======
+.. include:: common_osnoise_options.rst
+
+.. include:: common_hist_options.rst
+
+.. include:: common_options.rst
+
+EXAMPLE
+=======
+In the example below, *osnoise* tracer threads are set to run with real-time
+priority *FIFO:1*, on CPUs *0-11*, for *900ms* at each period (*1s* by
+default). The reason for reducing the runtime is to avoid starving the
+**rtla** tool. The tool is also set to run for *one minute*. The output
+histogram is set to group outputs in buckets of *10us* and *25* entries::
+
+ [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25
+ # RTLA osnoise histogram
+ # Time unit is microseconds (us)
+ # Duration: 0 00:01:00
+ Index CPU-000 CPU-001 CPU-002 CPU-003 CPU-004 CPU-005 CPU-006 CPU-007 CPU-008 CPU-009 CPU-010 CPU-011
+ 0 42982 46287 51779 53740 52024 44817 49898 36500 50408 50128 49523 52377
+ 10 12224 8356 2912 878 2667 10155 4573 18894 4214 4836 5708 2413
+ 20 8 5 12 2 13 24 20 41 29 53 39 39
+ 30 1 1 0 0 10 3 6 19 15 31 30 38
+ 40 0 0 0 0 0 4 2 7 2 3 8 11
+ 50 0 0 0 0 0 0 0 0 0 1 1 2
+ over: 0 0 0 0 0 0 0 0 0 0 0 0
+ count: 55215 54649 54703 54620 54714 55003 54499 55461 54668 55052 55309 54880
+ min: 0 0 0 0 0 0 0 0 0 0 0 0
+ avg: 0 0 0 0 0 0 0 0 0 0 0 0
+ max: 30 30 20 20 30 40 40 40 40 50 50 50
+
+SEE ALSO
+========
+**rtla-osnoise**\(1), **rtla-osnoise-top**\(1)
+
+*osnoise* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/tools/rtla/rtla-osnoise-top.rst b/Documentation/tools/rtla/rtla-osnoise-top.rst
new file mode 100644
index 000000000000..5d75d1394516
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-osnoise-top.rst
@@ -0,0 +1,61 @@
+===================
+rtla-osnoise-top
+===================
+-----------------------------------------------
+Display a summary of the operating system noise
+-----------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla osnoise top** [*OPTIONS*]
+
+DESCRIPTION
+===========
+.. include:: common_osnoise_description.rst
+
+**rtla osnoise top** collects the periodic summary from the *osnoise* tracer,
+including the counters of the occurrence of the interference source,
+displaying the results in a user-friendly format.
+
+The tool also allows many configurations of the *osnoise* tracer and the
+collection of the tracer output.
+
+OPTIONS
+=======
+.. include:: common_osnoise_options.rst
+
+.. include:: common_top_options.rst
+
+.. include:: common_options.rst
+
+EXAMPLE
+=======
+In the example below, the **rtla osnoise top** tool is set to run with a
+real-time priority *FIFO:1*, on CPUs *0-3*, for *900ms* at each period
+(*1s* by default). The reason for reducing the runtime is to avoid starving
+the rtla tool. The tool is also set to run for *one minute* and to display
+a summary of the report at the end of the session::
+
+ [root@f34 ~]# rtla osnoise top -P F:1 -c 0-3 -r 900000 -d 1M -q
+ Operating System Noise
+ duration: 0 00:01:00 | time is in us
+ CPU Period Runtime Noise % CPU Aval Max Noise Max Single HW NMI IRQ Softirq Thread
+ 0 #59 53100000 304896 99.42580 6978 56 549 0 53111 1590 13
+ 1 #59 53100000 338339 99.36282 8092 24 399 0 53130 1448 31
+ 2 #59 53100000 290842 99.45227 6582 39 855 0 53110 1406 12
+ 3 #59 53100000 204935 99.61405 6251 33 290 0 53156 1460 12
+
+SEE ALSO
+========
+
+**rtla-osnoise**\(1), **rtla-osnoise-hist**\(1)
+
+Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/tools/rtla/rtla-osnoise.rst b/Documentation/tools/rtla/rtla-osnoise.rst
new file mode 100644
index 000000000000..c129b206ce34
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-osnoise.rst
@@ -0,0 +1,59 @@
+===============
+rtla-osnoise
+===============
+------------------------------------------------------------------
+Measure the operating system noise
+------------------------------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla osnoise** [*MODE*] ...
+
+DESCRIPTION
+===========
+
+.. include:: common_osnoise_description.rst
+
+The *osnoise* tracer outputs information in two ways. It periodically prints
+a summary of the noise of the operating system, including the counters of
+the occurrence of the source of interference. It also provides information
+for each noise via the **osnoise:** tracepoints. The **rtla osnoise top**
+mode displays information about the periodic summary from the *osnoise* tracer.
+The **rtla osnoise hist** mode displays information about the noise using
+the **osnoise:** tracepoints. For further details, please refer to the
+respective man page.
+
+MODES
+=====
+**top**
+
+ Prints the summary from osnoise tracer.
+
+**hist**
+
+ Prints a histogram of osnoise samples.
+
+If no MODE is given, the top mode is called, passing the arguments.
+
+OPTIONS
+=======
+
+**-h**, **--help**
+
+ Display the help text.
+
+For other options, see the man page for the corresponding mode.
+
+SEE ALSO
+========
+**rtla-osnoise-top**\(1), **rtla-osnoise-hist**\(1)
+
+Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/tools/rtla/rtla-timerlat-hist.rst b/Documentation/tools/rtla/rtla-timerlat-hist.rst
new file mode 100644
index 000000000000..e12eae1f3301
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-timerlat-hist.rst
@@ -0,0 +1,106 @@
+=====================
+rtla-timerlat-hist
+=====================
+------------------------------------------------
+Histograms of the operating system timer latency
+------------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla timerlat hist** [*OPTIONS*] ...
+
+DESCRIPTION
+===========
+
+.. include:: common_timerlat_description.rst
+
+The **rtla timerlat hist** displays a histogram of each tracer event
+occurrence. This tool uses the periodic information, and the
+**osnoise:** tracepoints are enabled when using the **-T** option.
+
+OPTIONS
+=======
+
+.. include:: common_timerlat_options.rst
+
+.. include:: common_hist_options.rst
+
+.. include:: common_options.rst
+
+EXAMPLE
+=======
+In the example below, **rtla timerlat hist** is set to run for *10* minutes,
+in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat
+hist** will change the priority of the *timelat* threads to run under
+*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The
+*1ms* period is also passed to the *timerlat* tracer::
+
+ [root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms
+ # RTLA timerlat histogram
+ # Time unit is microseconds (us)
+ # Duration: 0 00:10:00
+ Index IRQ-000 Thr-000 IRQ-001 Thr-001 IRQ-002 Thr-002 IRQ-003 Thr-003 IRQ-004 Thr-004
+ 0 276489 0 206089 0 466018 0 481102 0 205546 0
+ 1 318327 35487 388149 30024 94531 48382 83082 71078 388026 55730
+ 2 3282 122584 4019 126527 28231 109012 23311 89309 4568 98739
+ 3 940 11815 837 9863 6209 16227 6895 17196 910 9780
+ 4 444 17287 424 11574 2097 38443 2169 36736 462 13476
+ 5 206 43291 255 25581 1223 101908 1304 101137 236 28913
+ 6 132 101501 96 64584 635 213774 757 215471 99 73453
+ 7 74 169347 65 124758 350 57466 441 53639 69 148573
+ 8 53 85183 31 156751 229 9052 306 9026 39 139907
+ 9 22 10387 12 42762 161 2554 225 2689 19 26192
+ 10 13 1898 8 5770 114 1247 128 1405 13 3772
+ 11 9 560 9 924 71 686 76 765 8 713
+ 12 4 256 2 360 50 411 64 474 3 278
+ 13 2 167 2 172 43 256 53 350 4 180
+ 14 1 88 1 116 15 198 42 223 0 115
+ 15 2 63 3 94 11 139 20 150 0 58
+ 16 2 37 0 56 5 78 10 102 0 39
+ 17 0 18 0 28 4 57 8 80 0 15
+ 18 0 8 0 17 2 50 6 56 0 12
+ 19 0 9 0 5 0 19 0 48 0 18
+ 20 0 4 0 8 0 11 2 27 0 4
+ 21 0 2 0 3 1 9 1 18 0 6
+ 22 0 1 0 3 1 7 0 3 0 5
+ 23 0 2 0 4 0 2 0 7 0 2
+ 24 0 2 0 2 1 3 0 3 0 5
+ 25 0 0 0 1 0 1 0 1 0 3
+ 26 0 1 0 0 0 2 0 2 0 0
+ 27 0 0 0 3 0 1 0 0 0 1
+ 28 0 0 0 3 0 0 0 1 0 0
+ 29 0 0 0 2 0 2 0 1 0 3
+ 30 0 1 0 0 0 0 0 0 0 0
+ 31 0 1 0 0 0 0 0 2 0 2
+ 32 0 0 0 1 0 2 0 0 0 0
+ 33 0 0 0 2 0 0 0 0 0 1
+ 34 0 0 0 0 0 0 0 0 0 2
+ 35 0 1 0 1 0 0 0 0 0 1
+ 36 0 1 0 0 0 1 0 1 0 0
+ 37 0 0 0 1 0 0 0 0 0 0
+ 40 0 0 0 0 0 1 0 1 0 0
+ 41 0 0 0 0 0 0 0 0 0 1
+ 42 0 0 0 0 0 0 0 0 0 1
+ 44 0 0 0 0 0 1 0 0 0 0
+ 46 0 0 0 0 0 0 0 1 0 0
+ 47 0 0 0 0 0 0 0 0 0 1
+ 50 0 0 0 0 0 0 0 0 0 1
+ 54 0 0 0 1 0 0 0 0 0 0
+ 58 0 0 0 1 0 0 0 0 0 0
+ over: 0 0 0 0 0 0 0 0 0 0
+ count: 600002 600002 600002 600002 600002 600002 600002 600002 600002 600002
+ min: 0 1 0 1 0 1 0 1 0 1
+ avg: 0 5 0 5 0 4 0 4 0 5
+ max: 16 36 15 58 24 44 21 46 13 50
+
+SEE ALSO
+========
+**rtla-timerlat**\(1), **rtla-timerlat-top**\(1)
+
+*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
diff --git a/Documentation/tools/rtla/rtla-timerlat-top.rst b/Documentation/tools/rtla/rtla-timerlat-top.rst
new file mode 100644
index 000000000000..1c321de1c171
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-timerlat-top.rst
@@ -0,0 +1,145 @@
+====================
+rtla-timerlat-top
+====================
+-------------------------------------------
+Measures the operating system timer latency
+-------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla timerlat top** [*OPTIONS*] ...
+
+DESCRIPTION
+===========
+
+.. include:: common_timerlat_description.rst
+
+The **rtla timerlat top** displays a summary of the periodic output
+from the *timerlat* tracer. It also provides information for each
+operating system noise via the **osnoise:** tracepoints that can be
+seem with the option **-T**.
+
+OPTIONS
+=======
+
+.. include:: common_timerlat_options.rst
+
+.. include:: common_top_options.rst
+
+.. include:: common_options.rst
+
+EXAMPLE
+=======
+
+In the example below, the *timerlat* tracer is set to capture the stack trace at
+the IRQ handler, printing it to the buffer if the *Thread* timer latency is
+higher than *30 us*. It is also set to stop the session if a *Thread* timer
+latency higher than *30 us* is hit. Finally, it is set to save the trace
+buffer if the stop condition is hit::
+
+ [root@alien ~]# rtla timerlat top -s 30 -t 30 -T
+ Timer Latency
+ 0 00:00:59 | IRQ Timer Latency (us) | Thread Timer Latency (us)
+ CPU COUNT | cur min avg max | cur min avg max
+ 0 #58634 | 1 0 1 10 | 11 2 10 23
+ 1 #58634 | 1 0 1 9 | 12 2 9 23
+ 2 #58634 | 0 0 1 11 | 10 2 9 23
+ 3 #58634 | 1 0 1 11 | 11 2 9 24
+ 4 #58634 | 1 0 1 10 | 11 2 9 26
+ 5 #58634 | 1 0 1 8 | 10 2 9 25
+ 6 #58634 | 12 0 1 12 | 30 2 10 30 <--- CPU with spike
+ 7 #58634 | 1 0 1 9 | 11 2 9 23
+ 8 #58633 | 1 0 1 9 | 11 2 9 26
+ 9 #58633 | 1 0 1 9 | 10 2 9 26
+ 10 #58633 | 1 0 1 13 | 11 2 9 28
+ 11 #58633 | 1 0 1 13 | 12 2 9 24
+ 12 #58633 | 1 0 1 8 | 10 2 9 23
+ 13 #58633 | 1 0 1 10 | 10 2 9 22
+ 14 #58633 | 1 0 1 18 | 12 2 9 27
+ 15 #58633 | 1 0 1 10 | 11 2 9 28
+ 16 #58633 | 0 0 1 11 | 7 2 9 26
+ 17 #58633 | 1 0 1 13 | 10 2 9 24
+ 18 #58633 | 1 0 1 9 | 13 2 9 22
+ 19 #58633 | 1 0 1 10 | 11 2 9 23
+ 20 #58633 | 1 0 1 12 | 11 2 9 28
+ 21 #58633 | 1 0 1 14 | 11 2 9 24
+ 22 #58633 | 1 0 1 8 | 11 2 9 22
+ 23 #58633 | 1 0 1 10 | 11 2 9 27
+ timerlat hit stop tracing
+ saving trace to timerlat_trace.txt
+ [root@alien bristot]# tail -60 timerlat_trace.txt
+ [...]
+ timerlat/5-79755 [005] ....... 426.271226: #58634 context thread timer_latency 10823 ns
+ sh-109404 [006] dnLh213 426.271247: #58634 context irq timer_latency 12505 ns
+ sh-109404 [006] dNLh313 426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns
+ sh-109404 [006] d...313 426.271263: thread_noise: sh:109404 start 426.271245853 duration 4769 ns
+ timerlat/6-79756 [006] ....... 426.271264: #58634 context thread timer_latency 30328 ns
+ timerlat/6-79756 [006] ....1.. 426.271265: <stack trace>
+ => timerlat_irq
+ => __hrtimer_run_queues
+ => hrtimer_interrupt
+ => __sysvec_apic_timer_interrupt
+ => sysvec_apic_timer_interrupt
+ => asm_sysvec_apic_timer_interrupt
+ => _raw_spin_unlock_irqrestore <---- spinlock that disabled interrupt.
+ => try_to_wake_up
+ => autoremove_wake_function
+ => __wake_up_common
+ => __wake_up_common_lock
+ => ep_poll_callback
+ => __wake_up_common
+ => __wake_up_common_lock
+ => fsnotify_add_event
+ => inotify_handle_inode_event
+ => fsnotify
+ => __fsnotify_parent
+ => __fput
+ => task_work_run
+ => exit_to_user_mode_prepare
+ => syscall_exit_to_user_mode
+ => do_syscall_64
+ => entry_SYSCALL_64_after_hwframe
+ => 0x7265000001378c
+ => 0x10000cea7
+ => 0x25a00000204a
+ => 0x12e302d00000000
+ => 0x19b51010901b6
+ => 0x283ce00726500
+ => 0x61ea308872
+ => 0x00000fe3
+ bash-109109 [007] d..h... 426.271265: #58634 context irq timer_latency 1211 ns
+ timerlat/6-79756 [006] ....... 426.271267: timerlat_main: stop tracing hit on cpu 6
+
+In the trace, it is possible the notice that the *IRQ* timer latency was
+already high, accounting *12505 ns*. The IRQ delay was caused by the
+*bash-109109* process that disabled IRQs in the wake-up path
+(*_try_to_wake_up()* function). The duration of the IRQ handler that woke
+up the timerlat thread, informed with the **osnoise:irq_noise** event, was
+also high and added more *12553 ns* to the Thread latency. Finally, the
+**osnoise:thread_noise** added by the currently running thread (including
+the scheduling overhead) added more *4769 ns*. Summing up these values,
+the *Thread* timer latency accounted for *30328 ns*.
+
+The primary reason for this high value is the wake-up path that was hit
+twice during this case: when the *bash-109109* was waking up a thread
+and then when the *timerlat* thread was awakened. This information can
+then be used as the starting point of a more fine-grained analysis.
+
+Note that **rtla timerlat** was dispatched without changing *timerlat* tracer
+threads' priority. That is generally not needed because these threads hava
+priority *FIFO:95* by default, which is a common priority used by real-time
+kernel developers to analyze scheduling delays.
+
+SEE ALSO
+--------
+**rtla-timerlat**\(1), **rtla-timerlat-hist**\(1)
+
+*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
+
+AUTHOR
+------
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/tools/rtla/rtla-timerlat.rst b/Documentation/tools/rtla/rtla-timerlat.rst
new file mode 100644
index 000000000000..44a49e6f302b
--- /dev/null
+++ b/Documentation/tools/rtla/rtla-timerlat.rst
@@ -0,0 +1,57 @@
+================
+rtla-timerlat
+================
+-------------------------------------------
+Measures the operating system timer latency
+-------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla timerlat** [*MODE*] ...
+
+DESCRIPTION
+===========
+
+.. include:: common_timerlat_description.rst
+
+The *timerlat* tracer outputs information in two ways. It periodically
+prints the timer latency at the timer *IRQ* handler and the *Thread* handler.
+It also provides information for each noise via the **osnoise:** tracepoints.
+The **rtla timerlat top** mode displays a summary of the periodic output
+from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram
+of each tracer event occurrence. For further details, please refer to the
+respective man page.
+
+MODES
+=====
+**top**
+
+ Prints the summary from *timerlat* tracer.
+
+**hist**
+
+ Prints a histogram of timerlat samples.
+
+If no *MODE* is given, the top mode is called, passing the arguments.
+
+OPTIONS
+=======
+**-h**, **--help**
+
+ Display the help text.
+
+For other options, see the man page for the corresponding mode.
+
+SEE ALSO
+========
+**rtla-timerlat-top**\(1), **rtla-timerlat-hist**\(1)
+
+*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/tools/rtla/rtla.rst b/Documentation/tools/rtla/rtla.rst
new file mode 100644
index 000000000000..fc0d233efcd5
--- /dev/null
+++ b/Documentation/tools/rtla/rtla.rst
@@ -0,0 +1,48 @@
+=========
+rtla
+=========
+--------------------------------
+Real-time Linux Analysis tool
+--------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+**rtla** *COMMAND* [*OPTIONS*]
+
+DESCRIPTION
+===========
+The **rtla** is a meta-tool that includes a set of commands that aims to
+analyze the real-time properties of Linux. But instead of testing Linux
+as a black box, **rtla** leverages kernel tracing capabilities to provide
+precise information about the properties and root causes of unexpected
+results.
+
+COMMANDS
+========
+**osnoise**
+
+ Gives information about the operating system noise (osnoise).
+
+**timerlat**
+
+ Measures the IRQ and thread timer latency.
+
+OPTIONS
+=======
+**-h**, **--help**
+
+ Display the help text.
+
+For other options, see the man page for the corresponding command.
+
+SEE ALSO
+========
+**rtla-osnoise**\(1), **rtla-timerlat**\(1)
+
+AUTHOR
+======
+Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
diff --git a/Documentation/trace/coresight/coresight-config.rst b/Documentation/trace/coresight/coresight-config.rst
index a4e3ef295240..6d5ffa6f7347 100644
--- a/Documentation/trace/coresight/coresight-config.rst
+++ b/Documentation/trace/coresight/coresight-config.rst
@@ -155,14 +155,14 @@ follows::
autofdo
$ cd autofdo/
$ ls
- description preset1 preset3 preset5 preset7 preset9
- feature_refs preset2 preset4 preset6 preset8
+ description feature_refs preset1 preset3 preset5 preset7 preset9
+ enable preset preset2 preset4 preset6 preset8
$ cat description
Setup ETMs with strobing for autofdo
$ cat feature_refs
strobing
-Each preset declared has a preset<n> subdirectory declared. The values for
+Each preset declared has a 'preset<n>' subdirectory declared. The values for
the preset can be examined::
$ cat preset1/values
@@ -170,6 +170,9 @@ the preset can be examined::
$ cat preset2/values
strobing.window = 0x1388 strobing.period = 0x4
+The 'enable' and 'preset' files allow the control of a configuration when
+using CoreSight with sysfs.
+
The features referenced by the configuration can be examined in the features
directory::
@@ -211,19 +214,13 @@ also declared in the perf 'cs_etm' event infrastructure so that they can
be selected when running trace under perf::
$ ls /sys/devices/cs_etm
- configurations format perf_event_mux_interval_ms sinks type
- events nr_addr_filters power
-
-Key directories here are 'configurations' - which lists the loaded
-configurations, and 'events' - a generic perf directory which allows
-selection on the perf command line.::
+ cpu0 cpu2 events nr_addr_filters power subsystem uevent
+ cpu1 cpu3 format perf_event_mux_interval_ms sinks type
- $ ls configurations/
- autofdo
- $ cat configurations/autofdo
- 0xa7c3dddd
+The key directory here is 'events' - a generic perf directory which allows
+selection on the perf command line. As with the sinks entries, this provides
+a hash of the configuration name.
-As with the sinks entries, this provides a hash of the configuration name.
The entry in the 'events' directory uses perfs built in syntax generator
to substitute the syntax for the name when evaluating the command::
@@ -242,3 +239,56 @@ A preset to override the current parameter values can also be selected::
When configurations are selected in this way, then the trace sink used is
automatically selected.
+
+Using Configurations in sysfs
+=============================
+
+Coresight can be controlled using sysfs. When this is in use then a configuration
+can be made active for the devices that are used in the sysfs session.
+
+In a configuration there are 'enable' and 'preset' files.
+
+To enable a configuration for use with sysfs::
+
+ $ cd configurations/autofdo
+ $ echo 1 > enable
+
+This will then use any default parameter values in the features - which can be
+adjusted as described above.
+
+To use a preset<n> set of parameter values::
+
+ $ echo 3 > preset
+
+This will select preset3 for the configuration.
+The valid values for preset are 0 - to deselect presets, and any value of
+<n> where a preset<n> sub-directory is present.
+
+Note that the active sysfs configuration is a global parameter, therefore
+only a single configuration can be active for sysfs at any one time.
+Attempting to enable a second configuration will result in an error.
+Additionally, attempting to disable the configuration while in use will
+also result in an error.
+
+The use of the active configuration by sysfs is independent of the configuration
+used in perf.
+
+
+Creating and Loading Custom Configurations
+==========================================
+
+Custom configurations and / or features can be dynamically loaded into the
+system by using a loadable module.
+
+An example of a custom configuration is found in ./samples/coresight.
+
+This creates a new configuration that uses the existing built in
+strobing feature, but provides a different set of presets.
+
+When the module is loaded, then the configuration appears in the configfs
+file system and is selectable in the same way as the built in configuration
+described above.
+
+Configurations can use previously loaded features. The system will ensure
+that it is not possible to unload a feature that is currently in use, by
+enforcing the unload order as the strict reverse of the load order.
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 8ddb9b09451c..c47f381d0c00 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
prev_comm ~ "*sh*"
prev_comm ~ "ba*sh"
+If the field is a pointer that points into user space (for example
+"filename" from sys_enter_openat), then you have to append ".ustring" to the
+field name::
+
+ filename.ustring ~ "password"
+
+As the kernel will have to know how to retrieve the memory that the pointer
+is at from user space.
+
5.2 Setting filters
-------------------
@@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of
the filter string; the error message should still be useful though
even without more accurate position info.
+5.2.1 Filter limitations
+------------------------
+
+If a filter is placed on a string pointer ``(char *)`` that does not point
+to a string on the ring buffer, but instead points to kernel or user space
+memory, then, for safety reasons, at most 1024 bytes of the content is
+copied onto a temporary buffer to do the compare. If the copy of the memory
+faults (the pointer points to memory that should not be accessed), then the
+string compare will be treated as not matching.
+
5.3 Clearing filters
--------------------
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index b3166c4a7867..45b8c56af67a 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -3370,7 +3370,7 @@ one of the latency tracers, you will get the following results.
Instances
---------
-In the tracefs tracing directory is a directory called "instances".
+In the tracefs tracing directory, there is a directory called "instances".
This directory can have new directories created inside of it using
mkdir, and removing directories with rmdir. The directory created
with mkdir in this directory will already contain files and other
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index cfe6cccf0f44..687efcf245c1 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -367,6 +367,7 @@ Code Seq# Include File Comments
<mailto:aherrman@de.ibm.com>
0xE5 00-3F linux/fuse.h
0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver
+0xEE 00-09 uapi/linux/pfrut.h Platform Firmware Runtime Update and Telemetry
0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development)
<mailto:thomas@winischhofer.net>
0xF6 all LTTng Linux Trace Toolkit Next Generation
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index aeeb071c7688..bb8cfddbb22d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -371,6 +371,9 @@ The bits in the dirty bitmap are cleared before the ioctl returns, unless
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
see the description of the capability.
+Note that the Xen shared info page, if configured, shall always be assumed
+to be dirty. KVM will not explicitly mark it such.
+
4.9 KVM_SET_MEMORY_ALIAS
------------------------
@@ -1566,6 +1569,7 @@ otherwise it will return EBUSY error.
struct kvm_xsave {
__u32 region[1024];
+ __u32 extra[0];
};
This ioctl would copy current vcpu's xsave struct to the userspace.
@@ -1574,7 +1578,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
4.43 KVM_SET_XSAVE
------------------
-:Capability: KVM_CAP_XSAVE
+:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (in)
@@ -1585,9 +1589,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
struct kvm_xsave {
__u32 region[1024];
+ __u32 extra[0];
};
-This ioctl would copy userspace's xsave struct to the kernel.
+This ioctl would copy userspace's xsave struct to the kernel. It copies
+as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
+when invoked on the vm file descriptor. The size value returned by
+KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
+Currently, it is only greater than 4096 if a dynamic feature has been
+enabled with ``arch_prctl()``, but this may change in the future.
+
+The offsets of the state save areas in struct kvm_xsave follow the
+contents of CPUID leaf 0xD on the host.
4.44 KVM_GET_XCRS
@@ -1684,6 +1697,10 @@ userspace capabilities, and with user requirements (for example, the
user may wish to constrain cpuid to emulate older hardware, or for
feature consistency across a cluster).
+Dynamically-enabled feature bits need to be requested with
+``arch_prctl()`` before calling this ioctl. Feature bits that have not
+been requested are excluded from the result.
+
Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
expose cpuid features (e.g. MONITOR) which are not supported by kvm in
its default configuration. If userspace enables such capabilities, it
@@ -1796,6 +1813,7 @@ No flags are specified so far, the corresponding field must be set to zero.
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
+ struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1805,6 +1823,7 @@ No flags are specified so far, the corresponding field must be set to zero.
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
+ #define KVM_IRQ_ROUTING_XEN_EVTCHN 5
flags:
@@ -1856,6 +1875,20 @@ address_hi must be zero.
__u32 sint;
};
+ struct kvm_irq_routing_xen_evtchn {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ };
+
+
+When KVM_CAP_XEN_HVM includes the KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL bit
+in its indication of supported features, routing to Xen event channels
+is supported. Although the priority field is present, only the value
+KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL is supported, which means delivery by
+2 level event channels. FIFO event channel support may be added in
+the future.
+
4.55 KVM_SET_TSC_KHZ
--------------------
@@ -3701,7 +3734,7 @@ KVM with the currently defined set of flags.
:Architectures: s390
:Type: vm ioctl
:Parameters: struct kvm_s390_skeys
-:Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+:Returns: 0 on success, KVM_S390_GET_SKEYS_NONE if guest is not using storage
keys, negative value on error
This ioctl is used to get guest storage key values on the s390
@@ -3720,7 +3753,7 @@ you want to get.
The count field is the number of consecutive frames (starting from start_gfn)
whose storage keys to get. The count field must be at least 1 and the maximum
-allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
will cause the ioctl to return -EINVAL.
The skeydata_addr field is the address to a buffer large enough to hold count
@@ -3744,7 +3777,7 @@ you want to set.
The count field is the number of consecutive frames (starting from start_gfn)
whose storage keys to get. The count field must be at least 1 and the maximum
-allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
will cause the ioctl to return -EINVAL.
The skeydata_addr field is the address to a buffer containing count bytes of
@@ -5134,6 +5167,15 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
not aware of the Xen CPU id which is used as the index into the
vcpu_info[] array, so cannot know the correct default location.
+ Note that the shared info page may be constantly written to by KVM;
+ it contains the event channel bitmap used to deliver interrupts to
+ a Xen guest, amongst other things. It is exempt from dirty tracking
+ mechanisms — KVM will not explicitly mark the page as dirty each
+ time an event channel interrupt is delivered to the guest! Thus,
+ userspace should always assume that the designated GFN is dirty if
+ any vCPU has been running or any event channel interrupts can be
+ routed to the guest.
+
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
Sets the exception vector used to deliver Xen event channel upcalls.
@@ -5503,6 +5545,34 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
The Stats Data block contains an array of 64-bit values in the same order
as the descriptors in Descriptors block.
+4.134 KVM_GET_XSAVE2
+--------------------
+
+:Capability: KVM_CAP_XSAVE2
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xsave (out)
+:Returns: 0 on success, -1 on error
+
+
+::
+
+ struct kvm_xsave {
+ __u32 region[1024];
+ __u32 extra[0];
+ };
+
+This ioctl would copy current vcpu's xsave struct to the userspace. It
+copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+when invoked on the vm file descriptor. The size value returned by
+KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
+Currently, it is only greater than 4096 if a dynamic feature has been
+enabled with ``arch_prctl()``, but this may change in the future.
+
+The offsets of the state save areas in struct kvm_xsave follow the contents
+of CPUID leaf 0xD on the host.
+
+
5. The kvm_run structure
========================
@@ -7293,7 +7363,7 @@ trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
------------------------------
+-------------------------------------
Architectures: x86
@@ -7401,6 +7471,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
+ #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 3)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -7420,6 +7491,10 @@ The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
+The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
+of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
+field set to indicate 2 level event channel delivery.
+
8.31 KVM_CAP_PPC_MULTITCE
-------------------------
diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index f60f5488e121..5b1ebad24c77 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -161,7 +161,7 @@ Shadow pages contain the following information:
If clear, this page corresponds to a guest page table denoted by the gfn
field.
role.quadrant:
- When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
+ When role.has_4_byte_gpte=1, the guest uses 32-bit gptes while the host uses 64-bit
sptes. That means a guest page table contains more ptes than the host,
so multiple shadow pages are needed to shadow one guest page.
For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
@@ -177,9 +177,9 @@ Shadow pages contain the following information:
The page is invalid and should not be used. It is a root page that is
currently pinned (by a cpu hardware register pointing to it); once it is
unpinned it will be destroyed.
- role.gpte_is_8_bytes:
- Reflects the size of the guest PTE for which the page is valid, i.e. '1'
- if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
+ role.has_4_byte_gpte:
+ Reflects the size of the guest PTE for which the page is valid, i.e. '0'
+ if direct map or 64-bit gptes are in use, '1' if 32-bit gptes are in use.
role.efer_nx:
Contains the value of efer.nx for which the page is valid.
role.cr0_wp:
diff --git a/Documentation/vm/arch_pgtable_helpers.rst b/Documentation/vm/arch_pgtable_helpers.rst
index 552567d863b8..f8b225fc9190 100644
--- a/Documentation/vm/arch_pgtable_helpers.rst
+++ b/Documentation/vm/arch_pgtable_helpers.rst
@@ -66,9 +66,11 @@ PTE Page Table Helpers
+---------------------------+--------------------------------------------------+
| pte_mknotpresent | Invalidates a mapped PTE |
+---------------------------+--------------------------------------------------+
-| ptep_get_and_clear | Clears a PTE |
+| ptep_clear | Clears a PTE |
+---------------------------+--------------------------------------------------+
-| ptep_get_and_clear_full | Clears a PTE |
+| ptep_get_and_clear | Clears and returns PTE |
++---------------------------+--------------------------------------------------+
+| ptep_get_and_clear_full | Clears and returns PTE (batched PTE unmap) |
+---------------------------+--------------------------------------------------+
| ptep_test_and_clear_young | Clears young from a PTE |
+---------------------------+--------------------------------------------------+
@@ -247,12 +249,12 @@ SWAP Page Table Helpers
| __swp_to_pmd_entry | Creates a mapped PMD from a swapped entry (arch) |
+---------------------------+--------------------------------------------------+
| is_migration_entry | Tests a migration (read or write) swapped entry |
-+---------------------------+--------------------------------------------------+
-| is_write_migration_entry | Tests a write migration swapped entry |
-+---------------------------+--------------------------------------------------+
-| make_migration_entry_read | Converts into read migration swapped entry |
-+---------------------------+--------------------------------------------------+
-| make_migration_entry | Creates a migration swapped entry (read or write)|
-+---------------------------+--------------------------------------------------+
++-------------------------------+----------------------------------------------+
+| is_writable_migration_entry | Tests a write migration swapped entry |
++-------------------------------+----------------------------------------------+
+| make_readable_migration_entry | Creates a read migration swapped entry |
++-------------------------------+----------------------------------------------+
+| make_writable_migration_entry | Creates a write migration swapped entry |
++-------------------------------+----------------------------------------------+
[1] https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/
diff --git a/Documentation/vm/cleancache.rst b/Documentation/vm/cleancache.rst
deleted file mode 100644
index 68cba9131c31..000000000000
--- a/Documentation/vm/cleancache.rst
+++ /dev/null
@@ -1,296 +0,0 @@
-.. _cleancache:
-
-==========
-Cleancache
-==========
-
-Motivation
-==========
-
-Cleancache is a new optional feature provided by the VFS layer that
-potentially dramatically increases page cache effectiveness for
-many workloads in many environments at a negligible cost.
-
-Cleancache can be thought of as a page-granularity victim cache for clean
-pages that the kernel's pageframe replacement algorithm (PFRA) would like
-to keep around, but can't since there isn't enough memory. So when the
-PFRA "evicts" a page, it first attempts to use cleancache code to
-put the data contained in that page into "transcendent memory", memory
-that is not directly accessible or addressable by the kernel and is
-of unknown and possibly time-varying size.
-
-Later, when a cleancache-enabled filesystem wishes to access a page
-in a file on disk, it first checks cleancache to see if it already
-contains it; if it does, the page of data is copied into the kernel
-and a disk access is avoided.
-
-Transcendent memory "drivers" for cleancache are currently implemented
-in Xen (using hypervisor memory) and zcache (using in-kernel compressed
-memory) and other implementations are in development.
-
-:ref:`FAQs <faq>` are included below.
-
-Implementation Overview
-=======================
-
-A cleancache "backend" that provides transcendent memory registers itself
-to the kernel's cleancache "frontend" by calling cleancache_register_ops,
-passing a pointer to a cleancache_ops structure with funcs set appropriately.
-The functions provided must conform to certain semantics as follows:
-
-Most important, cleancache is "ephemeral". Pages which are copied into
-cleancache have an indefinite lifetime which is completely unknowable
-by the kernel and so may or may not still be in cleancache at any later time.
-Thus, as its name implies, cleancache is not suitable for dirty pages.
-Cleancache has complete discretion over what pages to preserve and what
-pages to discard and when.
-
-Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a
-pool id which, if positive, must be saved in the filesystem's superblock;
-a negative return value indicates failure. A "put_page" will copy a
-(presumably about-to-be-evicted) page into cleancache and associate it with
-the pool id, a file key, and a page index into the file. (The combination
-of a pool id, a file key, and an index is sometimes called a "handle".)
-A "get_page" will copy the page, if found, from cleancache into kernel memory.
-An "invalidate_page" will ensure the page no longer is present in cleancache;
-an "invalidate_inode" will invalidate all pages associated with the specified
-file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate
-all pages in all files specified by the given pool id and also surrender
-the pool id.
-
-An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache
-to treat the pool as shared using a 128-bit UUID as a key. On systems
-that may run multiple kernels (such as hard partitioned or virtualized
-systems) that may share a clustered filesystem, and where cleancache
-may be shared among those kernels, calls to init_shared_fs that specify the
-same UUID will receive the same pool id, thus allowing the pages to
-be shared. Note that any security requirements must be imposed outside
-of the kernel (e.g. by "tools" that control cleancache). Or a
-cleancache implementation can simply disable shared_init by always
-returning a negative value.
-
-If a get_page is successful on a non-shared pool, the page is invalidated
-(thus making cleancache an "exclusive" cache). On a shared pool, the page
-is NOT invalidated on a successful get_page so that it remains accessible to
-other sharers. The kernel is responsible for ensuring coherency between
-cleancache (shared or not), the page cache, and the filesystem, using
-cleancache invalidate operations as required.
-
-Note that cleancache must enforce put-put-get coherency and get-get
-coherency. For the former, if two puts are made to the same handle but
-with different data, say AAA by the first put and BBB by the second, a
-subsequent get can never return the stale data (AAA). For get-get coherency,
-if a get for a given handle fails, subsequent gets for that handle will
-never succeed unless preceded by a successful put with that handle.
-
-Last, cleancache provides no SMP serialization guarantees; if two
-different Linux threads are simultaneously putting and invalidating a page
-with the same handle, the results are indeterminate. Callers must
-lock the page to ensure serial behavior.
-
-Cleancache Performance Metrics
-==============================
-
-If properly configured, monitoring of cleancache is done via debugfs in
-the `/sys/kernel/debug/cleancache` directory. The effectiveness of cleancache
-can be measured (across all filesystems) with:
-
-``succ_gets``
- number of gets that were successful
-
-``failed_gets``
- number of gets that failed
-
-``puts``
- number of puts attempted (all "succeed")
-
-``invalidates``
- number of invalidates attempted
-
-A backend implementation may provide additional metrics.
-
-.. _faq:
-
-FAQ
-===
-
-* Where's the value? (Andrew Morton)
-
-Cleancache provides a significant performance benefit to many workloads
-in many environments with negligible overhead by improving the
-effectiveness of the pagecache. Clean pagecache pages are
-saved in transcendent memory (RAM that is otherwise not directly
-addressable to the kernel); fetching those pages later avoids "refaults"
-and thus disk reads.
-
-Cleancache (and its sister code "frontswap") provide interfaces for
-this transcendent memory (aka "tmem"), which conceptually lies between
-fast kernel-directly-addressable RAM and slower DMA/asynchronous devices.
-Disallowing direct kernel or userland reads/writes to tmem
-is ideal when data is transformed to a different form and size (such
-as with compression) or secretly moved (as might be useful for write-
-balancing for some RAM-like devices). Evicted page-cache pages (and
-swap pages) are a great use for this kind of slower-than-RAM-but-much-
-faster-than-disk transcendent memory, and the cleancache (and frontswap)
-"page-object-oriented" specification provides a nice way to read and
-write -- and indirectly "name" -- the pages.
-
-In the virtual case, the whole point of virtualization is to statistically
-multiplex physical resources across the varying demands of multiple
-virtual machines. This is really hard to do with RAM and efforts to
-do it well with no kernel change have essentially failed (except in some
-well-publicized special-case workloads). Cleancache -- and frontswap --
-with a fairly small impact on the kernel, provide a huge amount
-of flexibility for more dynamic, flexible RAM multiplexing.
-Specifically, the Xen Transcendent Memory backend allows otherwise
-"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
-virtual machines, but the pages can be compressed and deduplicated to
-optimize RAM utilization. And when guest OS's are induced to surrender
-underutilized RAM (e.g. with "self-ballooning"), page cache pages
-are the first to go, and cleancache allows those pages to be
-saved and reclaimed if overall host system memory conditions allow.
-
-And the identical interface used for cleancache can be used in
-physical systems as well. The zcache driver acts as a memory-hungry
-device that stores pages of data in a compressed state. And
-the proposed "RAMster" driver shares RAM across multiple physical
-systems.
-
-* Why does cleancache have its sticky fingers so deep inside the
- filesystems and VFS? (Andrew Morton and Christoph Hellwig)
-
-The core hooks for cleancache in VFS are in most cases a single line
-and the minimum set are placed precisely where needed to maintain
-coherency (via cleancache_invalidate operations) between cleancache,
-the page cache, and disk. All hooks compile into nothingness if
-cleancache is config'ed off and turn into a function-pointer-
-compare-to-NULL if config'ed on but no backend claims the ops
-functions, or to a compare-struct-element-to-negative if a
-backend claims the ops functions but a filesystem doesn't enable
-cleancache.
-
-Some filesystems are built entirely on top of VFS and the hooks
-in VFS are sufficient, so don't require an "init_fs" hook; the
-initial implementation of cleancache didn't provide this hook.
-But for some filesystems (such as btrfs), the VFS hooks are
-incomplete and one or more hooks in fs-specific code are required.
-And for some other filesystems, such as tmpfs, cleancache may
-be counterproductive. So it seemed prudent to require a filesystem
-to "opt in" to use cleancache, which requires adding a hook in
-each filesystem. Not all filesystems are supported by cleancache
-only because they haven't been tested. The existing set should
-be sufficient to validate the concept, the opt-in approach means
-that untested filesystems are not affected, and the hooks in the
-existing filesystems should make it very easy to add more
-filesystems in the future.
-
-The total impact of the hooks to existing fs and mm files is only
-about 40 lines added (not counting comments and blank lines).
-
-* Why not make cleancache asynchronous and batched so it can more
- easily interface with real devices with DMA instead of copying each
- individual page? (Minchan Kim)
-
-The one-page-at-a-time copy semantics simplifies the implementation
-on both the frontend and backend and also allows the backend to
-do fancy things on-the-fly like page compression and
-page deduplication. And since the data is "gone" (copied into/out
-of the pageframe) before the cleancache get/put call returns,
-a great deal of race conditions and potential coherency issues
-are avoided. While the interface seems odd for a "real device"
-or for real kernel-addressable RAM, it makes perfect sense for
-transcendent memory.
-
-* Why is non-shared cleancache "exclusive"? And where is the
- page "invalidated" after a "get"? (Minchan Kim)
-
-The main reason is to free up space in transcendent memory and
-to avoid unnecessary cleancache_invalidate calls. If you want inclusive,
-the page can be "put" immediately following the "get". If
-put-after-get for inclusive becomes common, the interface could
-be easily extended to add a "get_no_invalidate" call.
-
-The invalidate is done by the cleancache backend implementation.
-
-* What's the performance impact?
-
-Performance analysis has been presented at OLS'09 and LCA'10.
-Briefly, performance gains can be significant on most workloads,
-especially when memory pressure is high (e.g. when RAM is
-overcommitted in a virtual workload); and because the hooks are
-invoked primarily in place of or in addition to a disk read/write,
-overhead is negligible even in worst case workloads. Basically
-cleancache replaces I/O with memory-copy-CPU-overhead; on older
-single-core systems with slow memory-copy speeds, cleancache
-has little value, but in newer multicore machines, especially
-consolidated/virtualized machines, it has great value.
-
-* How do I add cleancache support for filesystem X? (Boaz Harrash)
-
-Filesystems that are well-behaved and conform to certain
-restrictions can utilize cleancache simply by making a call to
-cleancache_init_fs at mount time. Unusual, misbehaving, or
-poorly layered filesystems must either add additional hooks
-and/or undergo extensive additional testing... or should just
-not enable the optional cleancache.
-
-Some points for a filesystem to consider:
-
- - The FS should be block-device-based (e.g. a ram-based FS such
- as tmpfs should not enable cleancache)
- - To ensure coherency/correctness, the FS must ensure that all
- file removal or truncation operations either go through VFS or
- add hooks to do the equivalent cleancache "invalidate" operations
- - To ensure coherency/correctness, either inode numbers must
- be unique across the lifetime of the on-disk file OR the
- FS must provide an "encode_fh" function.
- - The FS must call the VFS superblock alloc and deactivate routines
- or add hooks to do the equivalent cleancache calls done there.
- - To maximize performance, all pages fetched from the FS should
- go through the do_mpag_readpage routine or the FS should add
- hooks to do the equivalent (cf. btrfs)
- - Currently, the FS blocksize must be the same as PAGESIZE. This
- is not an architectural restriction, but no backends currently
- support anything different.
- - A clustered FS should invoke the "shared_init_fs" cleancache
- hook to get best performance for some backends.
-
-* Why not use the KVA of the inode as the key? (Christoph Hellwig)
-
-If cleancache would use the inode virtual address instead of
-inode/filehandle, the pool id could be eliminated. But, this
-won't work because cleancache retains pagecache data pages
-persistently even when the inode has been pruned from the
-inode unused list, and only invalidates the data page if the file
-gets removed/truncated. So if cleancache used the inode kva,
-there would be potential coherency issues if/when the inode
-kva is reused for a different file. Alternately, if cleancache
-invalidated the pages when the inode kva was freed, much of the value
-of cleancache would be lost because the cache of pages in cleanache
-is potentially much larger than the kernel pagecache and is most
-useful if the pages survive inode cache removal.
-
-* Why is a global variable required?
-
-The cleancache_enabled flag is checked in all of the frequently-used
-cleancache hooks. The alternative is a function call to check a static
-variable. Since cleancache is enabled dynamically at runtime, systems
-that don't enable cleancache would suffer thousands (possibly
-tens-of-thousands) of unnecessary function calls per second. So the
-global variable allows cleancache to be enabled by default at compile
-time, but have insignificant performance impact when cleancache remains
-disabled at runtime.
-
-* Does cleanache work with KVM?
-
-The memory model of KVM is sufficiently different that a cleancache
-backend may have less value for KVM. This remains to be tested,
-especially in an overcommitted system.
-
-* Does cleancache work in userspace? It sounds useful for
- memory hungry caches like web browsers. (Jamie Lokier)
-
-No plans yet, though we agree it sounds useful, at least for
-apps that bypass the page cache (e.g. O_DIRECT).
-
-Last updated: Dan Magenheimer, April 13 2011
diff --git a/Documentation/vm/frontswap.rst b/Documentation/vm/frontswap.rst
index 1979f430c1c5..feecc5e24477 100644
--- a/Documentation/vm/frontswap.rst
+++ b/Documentation/vm/frontswap.rst
@@ -8,12 +8,6 @@ Frontswap provides a "transcendent memory" interface for swap pages.
In some environments, dramatic performance savings may be obtained because
swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
-(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends"
-and the only necessary changes to the core kernel for transcendent memory;
-all other supporting code -- the "backends" -- is implemented as drivers.
-See the LWN.net article `Transcendent memory in a nutshell`_
-for a detailed overview of frontswap and related kernel parts)
-
.. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/
Frontswap is so named because it can be thought of as the opposite of
@@ -45,12 +39,6 @@ a disk write and, if the data is later read back, a disk read are avoided.
If a store returns failure, transcendent memory has rejected the data, and the
page can be written to swap as usual.
-If a backend chooses, frontswap can be configured as a "writethrough
-cache" by calling frontswap_writethrough(). In this mode, the reduction
-in swap device writes is lost (and also a non-trivial performance advantage)
-in order to allow the backend to arbitrarily "reclaim" space used to
-store frontswap pages to more completely manage its memory usage.
-
Note that if a page is stored and the page already exists in transcendent memory
(a "duplicate" store), either the store succeeds and the data is overwritten,
or the store fails AND the page is invalidated. This ensures stale data may
@@ -87,11 +75,9 @@ This interface is ideal when data is transformed to a different form
and size (such as with compression) or secretly moved (as might be
useful for write-balancing for some RAM-like devices). Swap pages (and
evicted page-cache pages) are a great use for this kind of slower-than-RAM-
-but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
-cleancache) interface to transcendent memory provides a nice way to read
-and write -- and indirectly "name" -- the pages.
+but-much-faster-than-disk "pseudo-RAM device".
-Frontswap -- and cleancache -- with a fairly small impact on the kernel,
+Frontswap with a fairly small impact on the kernel,
provides a huge amount of flexibility for more dynamic, flexible RAM
utilization in various system configurations:
@@ -269,19 +255,6 @@ the old data and ensure that it is no longer accessible. Since the
swap subsystem then writes the new data to the read swap device,
this is the correct course of action to ensure coherency.
-* What is frontswap_shrink for?
-
-When the (non-frontswap) swap subsystem swaps out a page to a real
-swap device, that page is only taking up low-value pre-allocated disk
-space. But if frontswap has placed a page in transcendent memory, that
-page may be taking up valuable real estate. The frontswap_shrink
-routine allows code outside of the swap subsystem to force pages out
-of the memory managed by frontswap and back into kernel-addressable memory.
-For example, in RAMster, a "suction driver" thread will attempt
-to "repatriate" pages sent to a remote machine back to the local machine;
-this is driven using the frontswap_shrink mechanism when memory pressure
-subsides.
-
* Why does the frontswap patch create the new include file swapfile.h?
The frontswap code depends on some swap-subsystem-internal data
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index 6f5ffef4b716..44365c4574a3 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -15,7 +15,6 @@ algorithms. If you are looking for advice on simply allocating memory, see the
active_mm
arch_pgtable_helpers
balance
- cleancache
damon/index
free_page_reporting
frontswap
@@ -31,10 +30,12 @@ algorithms. If you are looking for advice on simply allocating memory, see the
page_migration
page_frags
page_owner
+ page_table_check
remap_file_pages
slub
split_page_table_lock
transhuge
unevictable-lru
+ vmalloced-kernel-stacks
z3fold
zsmalloc
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
index 08810f549f70..8c5cb8147e55 100644
--- a/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@ -263,15 +263,15 @@ Monitoring Migration
The following events (counters) can be used to monitor page migration.
1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a
- page was migrated. If the page was a non-THP page, then this counter is
- increased by one. If the page was a THP, then this counter is increased by
- the number of THP subpages. For example, migration of a single 2MB THP that
- has 4KB-size base pages (subpages) will cause this counter to increase by
- 512.
+ page was migrated. If the page was a non-THP and non-hugetlb page, then
+ this counter is increased by one. If the page was a THP or hugetlb, then
+ this counter is increased by the number of THP or hugetlb subpages.
+ For example, migration of a single 2MB THP that has 4KB-size base pages
+ (subpages) will cause this counter to increase by 512.
2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for
PGMIGRATE_SUCCESS, above: this will be increased by the number of subpages,
- if it was a THP.
+ if it was a THP or hugetlb.
3. THP_MIGRATION_SUCCESS: A THP was migrated without being split.
diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst
new file mode 100644
index 000000000000..81f521ff7ea7
--- /dev/null
+++ b/Documentation/vm/page_table_check.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _page_table_check:
+
+================
+Page Table Check
+================
+
+Introduction
+============
+
+Page table check allows to hardern the kernel by ensuring that some types of
+the memory corruptions are prevented.
+
+Page table check performs extra verifications at the time when new pages become
+accessible from the userspace by getting their page table entries (PTEs PMDs
+etc.) added into the table.
+
+In case of detected corruption, the kernel is crashed. There is a small
+performance and memory overhead associated with the page table check. Therefore,
+it is disabled by default, but can be optionally enabled on systems where the
+extra hardening outweighs the performance costs. Also, because page table check
+is synchronous, it can help with debugging double map memory corruption issues,
+by crashing kernel at the time wrong mapping occurs instead of later which is
+often the case with memory corruptions bugs.
+
+Double mapping detection logic
+==============================
+
++-------------------+-------------------+-------------------+------------------+
+| Current Mapping | New mapping | Permissions | Rule |
++===================+===================+===================+==================+
+| Anonymous | Anonymous | Read | Allow |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Anonymous | Read / Write | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Named | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Anonymous | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Named | Any | Allow |
++-------------------+-------------------+-------------------+------------------+
+
+Enabling Page Table Check
+=========================
+
+Build kernel with:
+
+- PAGE_TABLE_CHECK=y
+ Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ is available.
+
+- Boot with 'page_table_check=on' kernel parameter.
+
+Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page
+table support without extra kernel parameter.
diff --git a/Documentation/vm/vmalloced-kernel-stacks.rst b/Documentation/vm/vmalloced-kernel-stacks.rst
new file mode 100644
index 000000000000..fc8c67833af6
--- /dev/null
+++ b/Documentation/vm/vmalloced-kernel-stacks.rst
@@ -0,0 +1,153 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Virtually Mapped Kernel Stack Support
+=====================================
+
+:Author: Shuah Khan <skhan@linuxfoundation.org>
+
+.. contents:: :local:
+
+Overview
+--------
+
+This is a compilation of information from the code and original patch
+series that introduced the `Virtually Mapped Kernel Stacks feature
+<https://lwn.net/Articles/694348/>`
+
+Introduction
+------------
+
+Kernel stack overflows are often hard to debug and make the kernel
+susceptible to exploits. Problems could show up at a later time making
+it difficult to isolate and root-cause.
+
+Virtually-mapped kernel stacks with guard pages causes kernel stack
+overflows to be caught immediately rather than causing difficult to
+diagnose corruptions.
+
+HAVE_ARCH_VMAP_STACK and VMAP_STACK configuration options enable
+support for virtually mapped stacks with guard pages. This feature
+causes reliable faults when the stack overflows. The usability of
+the stack trace after overflow and response to the overflow itself
+is architecture dependent.
+
+.. note::
+ As of this writing, arm64, powerpc, riscv, s390, um, and x86 have
+ support for VMAP_STACK.
+
+HAVE_ARCH_VMAP_STACK
+--------------------
+
+Architectures that can support Virtually Mapped Kernel Stacks should
+enable this bool configuration option. The requirements are:
+
+- vmalloc space must be large enough to hold many kernel stacks. This
+ may rule out many 32-bit architectures.
+- Stacks in vmalloc space need to work reliably. For example, if
+ vmap page tables are created on demand, either this mechanism
+ needs to work while the stack points to a virtual address with
+ unpopulated page tables or arch code (switch_to() and switch_mm(),
+ most likely) needs to ensure that the stack's page table entries
+ are populated before running on a possibly unpopulated stack.
+- If the stack overflows into a guard page, something reasonable
+ should happen. The definition of "reasonable" is flexible, but
+ instantly rebooting without logging anything would be unfriendly.
+
+VMAP_STACK
+----------
+
+VMAP_STACK bool configuration option when enabled allocates virtually
+mapped task stacks. This option depends on HAVE_ARCH_VMAP_STACK.
+
+- Enable this if you want the use virtually-mapped kernel stacks
+ with guard pages. This causes kernel stack overflows to be caught
+ immediately rather than causing difficult-to-diagnose corruption.
+
+.. note::
+
+ Using this feature with KASAN requires architecture support
+ for backing virtual mappings with real shadow memory, and
+ KASAN_VMALLOC must be enabled.
+
+.. note::
+
+ VMAP_STACK is enabled, it is not possible to run DMA on stack
+ allocated data.
+
+Kernel configuration options and dependencies keep changing. Refer to
+the latest code base:
+
+`Kconfig <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/Kconfig>`
+
+Allocation
+-----------
+
+When a new kernel thread is created, thread stack is allocated from
+virtually contiguous memory pages from the page level allocator. These
+pages are mapped into contiguous kernel virtual space with PAGE_KERNEL
+protections.
+
+alloc_thread_stack_node() calls __vmalloc_node_range() to allocate stack
+with PAGE_KERNEL protections.
+
+- Allocated stacks are cached and later reused by new threads, so memcg
+ accounting is performed manually on assigning/releasing stacks to tasks.
+ Hence, __vmalloc_node_range is called without __GFP_ACCOUNT.
+- vm_struct is cached to be able to find when thread free is initiated
+ in interrupt context. free_thread_stack() can be called in interrupt
+ context.
+- On arm64, all VMAP's stacks need to have the same alignment to ensure
+ that VMAP'd stack overflow detection works correctly. Arch specific
+ vmap stack allocator takes care of this detail.
+- This does not address interrupt stacks - according to the original patch
+
+Thread stack allocation is initiated from clone(), fork(), vfork(),
+kernel_thread() via kernel_clone(). Leaving a few hints for searching
+the code base to understand when and how thread stack is allocated.
+
+Bulk of the code is in:
+`kernel/fork.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/fork.c>`.
+
+stack_vm_area pointer in task_struct keeps track of the virtually allocated
+stack and a non-null stack_vm_area pointer serves as a indication that the
+virtually mapped kernel stacks are enabled.
+
+::
+
+ struct vm_struct *stack_vm_area;
+
+Stack overflow handling
+-----------------------
+
+Leading and trailing guard pages help detect stack overflows. When stack
+overflows into the guard pages, handlers have to be careful not overflow
+the stack again. When handlers are called, it is likely that very little
+stack space is left.
+
+On x86, this is done by handling the page fault indicating the kernel
+stack overflow on the double-fault stack.
+
+Testing VMAP allocation with guard pages
+----------------------------------------
+
+How do we ensure that VMAP_STACK is actually allocating with a leading
+and trailing guard page? The following lkdtm tests can help detect any
+regressions.
+
+::
+
+ void lkdtm_STACK_GUARD_PAGE_LEADING()
+ void lkdtm_STACK_GUARD_PAGE_TRAILING()
+
+Conclusions
+-----------
+
+- A percpu cache of vmalloced stacks appears to be a bit faster than a
+ high-order stack allocation, at least when the cache hits.
+- THREAD_INFO_IN_TASK gets rid of arch-specific thread_info entirely and
+ simply embed the thread_info (containing only flags) and 'int cpu' into
+ task_struct.
+- The thread stack can be free'ed as soon as the task is dead (without
+ waiting for RCU) and then, if vmapped stacks are in use, cache the
+ entire stack for reuse on the same cpu.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3b705e776cbc..ea3e6c914384 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -226,6 +226,7 @@ F: drivers/net/ethernet/8390/
M: Eric Van Hensbergen <ericvh@gmail.com>
M: Latchesar Ionkov <lucho@ionkov.net>
M: Dominique Martinet <asmadeus@codewreck.org>
+R: Christian Schoenebeck <linux_oss@crudebyte.com>
L: v9fs-developer@lists.sourceforge.net
S: Maintained
W: http://swik.net/v9fs
@@ -1077,6 +1078,15 @@ W: http://ez.analog.com/community/linux-device-drivers
F: Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
F: drivers/iio/adc/ad7780.c
+ANALOG DEVICES INC AD74413R DRIVER
+M: Cosmin Tanislav <cosmin.tanislav@analog.com>
+L: linux-iio@vger.kernel.org
+S: Supported
+W: http://ez.analog.com/community/linux-device-drivers
+F: Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
+F: drivers/iio/addac/ad74413r.c
+F: include/dt-bindings/iio/addac/adi,ad74413r.h
+
ANALOG DEVICES INC AD9389B DRIVER
M: Hans Verkuil <hverkuil-cisco@xs4all.nl>
L: linux-media@vger.kernel.org
@@ -1903,6 +1913,7 @@ F: Documentation/trace/coresight/*
F: drivers/hwtracing/coresight/*
F: include/dt-bindings/arm/coresight-cti-dt.h
F: include/linux/coresight*
+F: samples/coresight/*
F: tools/perf/arch/arm/util/auxtrace.c
F: tools/perf/arch/arm/util/cs-etm.c
F: tools/perf/arch/arm/util/cs-etm.h
@@ -3399,14 +3410,14 @@ M: Yury Norov <yury.norov@gmail.com>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Rasmus Villemoes <linux@rasmusvillemoes.dk>
S: Maintained
-F: include/asm-generic/bitops/find.h
F: include/linux/bitmap.h
+F: include/linux/find.h
F: lib/bitmap.c
F: lib/find_bit.c
F: lib/find_bit_benchmark.c
F: lib/test_bitmap.c
-F: tools/include/asm-generic/bitops/find.h
F: tools/include/linux/bitmap.h
+F: tools/include/linux/find.h
F: tools/lib/bitmap.c
F: tools/lib/find_bit.c
@@ -4456,7 +4467,6 @@ L: keyrings@vger.kernel.org
S: Maintained
F: Documentation/admin-guide/module-signing.rst
F: certs/
-F: scripts/extract-cert.c
F: scripts/sign-file.c
CFAG12864B LCD DRIVER
@@ -4695,13 +4705,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/cla
F: include/linux/cfi.h
F: kernel/cfi.c
-CLEANCACHE API
-M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L: linux-kernel@vger.kernel.org
-S: Maintained
-F: include/linux/cleancache.h
-F: mm/cleancache.c
-
CLK API
M: Russell King <linux@armlinux.org.uk>
L: linux-clk@vger.kernel.org
@@ -4734,7 +4737,6 @@ F: drivers/media/pci/cobalt/
COCCINELLE/Semantic Patches (SmPL)
M: Julia Lawall <Julia.Lawall@inria.fr>
-M: Gilles Muller <Gilles.Muller@inria.fr>
M: Nicolas Palix <nicolas.palix@imag.fr>
L: cocci@inria.fr (moderated for non-subscribers)
S: Supported
@@ -4773,6 +4775,8 @@ M: Ian Abbott <abbotti@mev.co.uk>
M: H Hartley Sweeten <hsweeten@visionengravers.com>
S: Odd Fixes
F: drivers/comedi/
+F: include/linux/comedi/
+F: include/uapi/linux/comedi.h
COMMON CLK FRAMEWORK
M: Michael Turquette <mturquette@baylibre.com>
@@ -7404,7 +7408,6 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
-M: "J. Bruce Fields" <bfields@fieldses.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
@@ -7571,11 +7574,12 @@ W: http://floatingpoint.sourceforge.net/emulator/index.html
F: arch/x86/math-emu/
FRAMEBUFFER LAYER
-L: dri-devel@lists.freedesktop.org
+M: Helge Deller <deller@gmx.de>
L: linux-fbdev@vger.kernel.org
-S: Orphan
+L: dri-devel@lists.freedesktop.org
+S: Maintained
Q: http://patchwork.kernel.org/project/linux-fbdev/list/
-T: git git://anongit.freedesktop.org/drm/drm-misc
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git
F: Documentation/fb/
F: drivers/video/
F: include/linux/fb.h
@@ -9770,6 +9774,13 @@ F: drivers/crypto/keembay/keembay-ocs-hcu-core.c
F: drivers/crypto/keembay/ocs-hcu.c
F: drivers/crypto/keembay/ocs-hcu.h
+INTEL THUNDER BAY EMMC PHY DRIVER
+M: Nandhini Srikandan <nandhini.srikandan@intel.com>
+M: Rashmi A <rashmi.a@intel.com>
+S: Maintained
+F: Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
+F: drivers/phy/intel/phy-intel-thunderbay-emmc.c
+
INTEL MANAGEMENT ENGINE (mei)
M: Tomas Winkler <tomas.winkler@intel.com>
L: linux-kernel@vger.kernel.org
@@ -10408,12 +10419,11 @@ S: Odd Fixes
W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
-M: "J. Bruce Fields" <bfields@fieldses.org>
M: Chuck Lever <chuck.lever@oracle.com>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
-T: git git://linux-nfs.org/~bfields/linux.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
F: fs/lockd/
F: fs/nfs_common/
F: fs/nfsd/
@@ -10521,8 +10531,8 @@ F: arch/powerpc/kernel/kvm*
F: arch/powerpc/kvm/
KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
-M: Anup Patel <anup.patel@wdc.com>
-R: Atish Patra <atish.patra@wdc.com>
+M: Anup Patel <anup@brainfault.org>
+R: Atish Patra <atishp@atishpatra.org>
L: kvm@vger.kernel.org
L: kvm-riscv@lists.infradead.org
L: linux-riscv@lists.infradead.org
@@ -10687,6 +10697,7 @@ F: samples/kmemleak/kmemleak-test.c
KMOD KERNEL MODULE LOADER - USERMODE HELPER
M: Luis Chamberlain <mcgrof@kernel.org>
L: linux-kernel@vger.kernel.org
+L: linux-modules@vger.kernel.org
S: Maintained
F: include/linux/kmod.h
F: kernel/kmod.c
@@ -12976,9 +12987,10 @@ F: drivers/media/dvb-frontends/mn88473*
MODULE SUPPORT
M: Luis Chamberlain <mcgrof@kernel.org>
-M: Jessica Yu <jeyu@kernel.org>
+L: linux-modules@vger.kernel.org
+L: linux-kernel@vger.kernel.org
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git modules-next
F: include/linux/module.h
F: kernel/module.c
@@ -13817,12 +13829,24 @@ F: Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
F: drivers/gpu/drm/imx/dcss/
NXP i.MX 8QXP ADC DRIVER
-M: Cai Huoqing <caihuoqing@baidu.com>
+M: Cai Huoqing <cai.huoqing@linux.dev>
+M: Haibo Chen <haibo.chen@nxp.com>
+L: linux-imx@nxp.com
L: linux-iio@vger.kernel.org
-S: Supported
+S: Maintained
F: Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
F: drivers/iio/adc/imx8qxp-adc.c
+NXP i.MX 7D/6SX/6UL AND VF610 ADC DRIVER
+M: Haibo Chen <haibo.chen@nxp.com>
+L: linux-iio@vger.kernel.org
+L: linux-imx@nxp.com
+S: Maintained
+F: Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml
+F: Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
+F: drivers/iio/adc/imx7d_adc.c
+F: drivers/iio/adc/vf610_adc.c
+
NXP PF8100/PF8121A/PF8200 PMIC REGULATOR DEVICE DRIVER
M: Jagan Teki <jagan@amarulasolutions.com>
S: Maintained
@@ -14510,6 +14534,15 @@ F: include/net/page_pool.h
F: include/trace/events/page_pool.h
F: net/core/page_pool.c
+PAGE TABLE CHECK
+M: Pasha Tatashin <pasha.tatashin@soleen.com>
+M: Andrew Morton <akpm@linux-foundation.org>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/vm/page_table_check.rst
+F: include/linux/page_table_check.h
+F: mm/page_table_check.c
+
PANASONIC LAPTOP ACPI EXTRAS DRIVER
M: Kenneth Chan <kenneth.t.chan@gmail.com>
L: platform-driver-x86@vger.kernel.org
@@ -14851,6 +14884,19 @@ L: linux-pci@vger.kernel.org
S: Supported
F: Documentation/PCI/pci-error-recovery.rst
+PCI PEER-TO-PEER DMA (P2PDMA)
+M: Bjorn Helgaas <bhelgaas@google.com>
+M: Logan Gunthorpe <logang@deltatee.com>
+L: linux-pci@vger.kernel.org
+S: Supported
+Q: https://patchwork.kernel.org/project/linux-pci/list/
+B: https://bugzilla.kernel.org
+C: irc://irc.oftc.net/linux-pci
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+F: Documentation/driver-api/pci/p2pdma.rst
+F: drivers/pci/p2pdma.c
+F: include/linux/pci-p2pdma.h
+
PCI MSI DRIVER FOR ALTERA MSI IP
M: Joyce Ooi <joyce.ooi@intel.com>
L: linux-pci@vger.kernel.org
@@ -16290,6 +16336,13 @@ S: Maintained
F: include/sound/rt*.h
F: sound/soc/codecs/rt*
+REALTEK OTTO WATCHDOG
+M: Sander Vanheule <sander@svanheule.net>
+L: linux-watchdog@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml
+F: drivers/watchdog/realtek_otto_wdt.c
+
REALTEK RTL83xx SMI DSA ROUTER CHIPS
M: Linus Walleij <linus.walleij@linaro.org>
S: Maintained
@@ -16339,7 +16392,6 @@ S: Supported
F: fs/reiserfs/
REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
-M: Ohad Ben-Cohen <ohad@wizery.com>
M: Bjorn Andersson <bjorn.andersson@linaro.org>
M: Mathieu Poirier <mathieu.poirier@linaro.org>
L: linux-remoteproc@vger.kernel.org
@@ -16353,7 +16405,6 @@ F: include/linux/remoteproc.h
F: include/linux/remoteproc/
REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
-M: Ohad Ben-Cohen <ohad@wizery.com>
M: Bjorn Andersson <bjorn.andersson@linaro.org>
M: Mathieu Poirier <mathieu.poirier@linaro.org>
L: linux-remoteproc@vger.kernel.org
@@ -16749,6 +16800,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com>
M: Christian Borntraeger <borntraeger@linux.ibm.com>
R: Alexander Gordeev <agordeev@linux.ibm.com>
+R: Sven Schnelle <svens@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
@@ -18432,6 +18484,13 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/dlink/sundance.c
+SUNPLUS RTC DRIVER
+M: Vincent Shih <vincent.sunplus@gmail.com>
+L: linux-rtc@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
+F: drivers/rtc/rtc-sunplus.c
+
SUPERH
M: Yoshinori Sato <ysato@users.sourceforge.jp>
M: Rich Felker <dalias@libc.org>
@@ -20318,6 +20377,7 @@ M: "Michael S. Tsirkin" <mst@redhat.com>
M: Jason Wang <jasowang@redhat.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
+F: Documentation/ABI/testing/sysfs-bus-vdpa
F: Documentation/devicetree/bindings/virtio/
F: drivers/block/virtio_blk.c
F: drivers/crypto/virtio/
@@ -21094,6 +21154,13 @@ F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h
+XILINX AMS DRIVER
+M: Anand Ashok Dumbre <anand.ashok.dumbre@xilinx.com>
+L: linux-iio@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml
+F: drivers/iio/adc/xilinx-ams.c
+
XILINX AXI ETHERNET DRIVER
M: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
S: Maintained
@@ -21162,6 +21229,12 @@ T: git https://github.com/Xilinx/linux-xlnx.git
F: Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
F: drivers/phy/xilinx/phy-zynqmp.c
+XILINX EVENT MANAGEMENT DRIVER
+M: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
+S: Maintained
+F: drivers/soc/xilinx/xlnx_event_manager.c
+F: include/linux/firmware/xlnx-event-manager.h
+
XILLYBUS DRIVER
M: Eli Billauer <eli.billauer@gmail.com>
L: linux-kernel@vger.kernel.org
diff --git a/Makefile b/Makefile
index 0fc511aac61c..0fb4f94a6885 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
-PATCHLEVEL = 16
+PATCHLEVEL = 17
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Gobble Gobble
# *DOCUMENTATION*
@@ -778,7 +778,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong
KBUILD_CFLAGS += $(stackp-flags-y)
KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%)
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += -Qunused-arguments
@@ -1278,15 +1278,6 @@ headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
$(Q)$(MAKE) $(hdr-inst)=include/uapi
$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
-# Deprecated. It is no-op now.
-PHONY += headers_check
-headers_check:
- @echo >&2 "=================== WARNING ==================="
- @echo >&2 "Since Linux 5.5, 'make headers_check' is no-op,"
- @echo >&2 "and will be removed after Linux 5.15 release."
- @echo >&2 "Please remove headers_check from your scripts."
- @echo >&2 "==============================================="
-
ifdef CONFIG_HEADERS_INSTALL
prepare: headers
endif
@@ -1497,7 +1488,7 @@ MRPROPER_FILES += include/config include/generated \
debian snap tar-install \
.config .config.old .version \
Module.symvers \
- certs/signing_key.pem certs/signing_key.x509 \
+ certs/signing_key.pem \
certs/x509.genkey \
vmlinux-gdb.py \
*.spec
@@ -1723,9 +1714,9 @@ PHONY += prepare
# now expand this into a simple variable to reduce the cost of shell evaluations
prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT)
prepare:
- @if [ "$(CC_VERSION_TEXT)" != $(CONFIG_CC_VERSION_TEXT) ]; then \
+ @if [ "$(CC_VERSION_TEXT)" != "$(CONFIG_CC_VERSION_TEXT)" ]; then \
echo >&2 "warning: the compiler differs from the one used to build the kernel"; \
- echo >&2 " The kernel was built by: "$(CONFIG_CC_VERSION_TEXT); \
+ echo >&2 " The kernel was built by: $(CONFIG_CC_VERSION_TEXT)"; \
echo >&2 " You are using: $(CC_VERSION_TEXT)"; \
fi
diff --git a/arch/Kconfig b/arch/Kconfig
index 847fde3d22cd..678a80713b21 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -648,8 +648,7 @@ config ARCH_SUPPORTS_LTO_CLANG_THIN
config HAS_LTO_CLANG
def_bool y
- # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510
- depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD && AS_IS_LLVM
+ depends on CC_IS_CLANG && LD_IS_LLD && AS_IS_LLVM
depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
depends on ARCH_SUPPORTS_LTO_CLANG
@@ -998,6 +997,10 @@ config PAGE_SIZE_LESS_THAN_64KB
depends on !PAGE_SIZE_64KB
depends on !PARISC_PAGE_SIZE_64KB
depends on !PPC_64K_PAGES
+ depends on PAGE_SIZE_LESS_THAN_256KB
+
+config PAGE_SIZE_LESS_THAN_256KB
+ def_bool y
depends on !PPC_256K_PAGES
depends on !PAGE_SIZE_256KB
@@ -1297,6 +1300,9 @@ config HAVE_ARCH_PFN_VALID
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
bool
+config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ bool
+
config ARCH_SPLIT_ARG64
bool
help
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index 5adca78830b5..e1d8483a45f2 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -430,8 +430,6 @@ static inline unsigned int __arch_hweight8(unsigned int w)
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
/*
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index ce3077946e1d..fb3025396ac9 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -80,7 +80,12 @@ init_rtc_epoch(void)
static int
alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
- mc146818_get_time(tm);
+ int ret = mc146818_get_time(tm);
+
+ if (ret < 0) {
+ dev_err_ratelimited(dev, "unable to read current time\n");
+ return ret;
+ }
/* Adjust for non-default epochs. It's easier to depend on the
generic __get_rtc_time and adjust the epoch here than create
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index 528d2be58182..217b4dca51dd 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -83,14 +83,14 @@ static int srm_env_proc_show(struct seq_file *m, void *v)
static int srm_env_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, srm_env_proc_show, PDE_DATA(inode));
+ return single_open(file, srm_env_proc_show, pde_data(inode));
}
static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *pos)
{
int res;
- unsigned long id = (unsigned long)PDE_DATA(file_inode(file));
+ unsigned long id = (unsigned long)pde_data(file_inode(file));
char *buf = (char *) __get_free_page(GFP_USER);
unsigned long ret1, ret2;
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index ca5a32228cd6..3515bc4f16a4 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -489,3 +489,4 @@
# 557 reserved for memfd_secret
558 common process_mrelease sys_process_mrelease
559 common futex_waitv sys_futex_waitv
+560 common set_mempolicy_home_node sys_ni_syscall
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 2ae34702456c..8a66fe544c69 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -190,7 +190,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
local_irq_enable();
while (1);
}
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
#ifndef CONFIG_MATHEMU
@@ -575,7 +575,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
pc, va, opcode, reg);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
got_exception:
/* Ok, we caught the exception, but we don't want it. Is there
@@ -630,7 +630,7 @@ got_exception:
local_irq_enable();
while (1);
}
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
/*
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index eee5102c3d88..ec20c1004abf 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -165,17 +165,15 @@ retry:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
@@ -204,7 +202,7 @@ retry:
printk(KERN_ALERT "Unable to handle kernel paging request at "
"virtual address %016lx\n", address);
die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
/* We ran out of memory, or some other thing happened to us that
made us unable to handle the page fault gracefully. */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index f74d9860a442..3c2a4753d09b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -20,7 +20,6 @@ config ARC
select COMMON_CLK
select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
- select GENERIC_FIND_FIRST_BIT
# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index f252e7b924e9..efc54f3e35e0 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -14,10 +14,10 @@ cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mcpu=arc700
tune-mcpu-def-$(CONFIG_ISA_ARCV2) := -mcpu=hs38
-ifeq ($(CONFIG_ARC_TUNE_MCPU),"")
+ifeq ($(CONFIG_ARC_TUNE_MCPU),)
cflags-y += $(tune-mcpu-def-y)
else
-tune-mcpu := $(shell echo $(CONFIG_ARC_TUNE_MCPU))
+tune-mcpu := $(CONFIG_ARC_TUNE_MCPU)
ifneq ($(call cc-option,$(tune-mcpu)),)
cflags-y += $(tune-mcpu)
else
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index 8483a86c743d..4237aa5de3a3 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -2,8 +2,8 @@
# Built-in dtb
builtindtb-y := nsim_700
-ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
- builtindtb-y := $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
+ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),)
+ builtindtb-y := $(CONFIG_ARC_BUILTIN_DTB_NAME)
endif
obj-y += $(builtindtb-y).dtb.o
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index a7daaf64ae34..bdb7e190a294 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -189,7 +189,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 863d63ad18d6..0d63e568d64c 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -50,8 +50,12 @@
* are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
*
* Noted at the time of Abilis Timer List corruption
- * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67
- * Reasoning : https://lkml.org/lkml/2013/4/8/15
+ *
+ * Orig Bug + Rejected solution:
+ * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com
+ *
+ * Reasoning:
+ * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com
*
******************************************************************/
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index e1971d34ef30..4c919c0f4b30 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -63,166 +63,4 @@ struct arc_reg_cc_build {
#define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 8)
-/*
- * Some ARC pct quirks:
- *
- * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
- * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
- * The ARC 700 can either measure stalls per pipeline stage, or all stalls
- * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
- * and all pipeline flushes (e.g. caused by mispredicts, etc.) to
- * STALLED_CYCLES_FRONTEND.
- *
- * We could start multiple performance counters and combine everything
- * afterwards, but that makes it complicated.
- *
- * Note that I$ cache misses aren't counted by either of the two!
- */
-
-/*
- * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
- * (based on a specific RTL build)
- * Below is the static map between perf generic/arc specific event_id and
- * h/w condition names.
- * At the time of probe, we loop thru each index and find it's name to
- * complete the mapping of perf event_id to h/w index as latter is needed
- * to program the counter really
- */
-static const char * const arc_pmu_ev_hw_map[] = {
- /* count cycles */
- [PERF_COUNT_HW_CPU_CYCLES] = "crun",
- [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
- [PERF_COUNT_HW_BUS_CYCLES] = "crun",
-
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-
- /* counts condition */
- [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
- /* All jump instructions that are taken */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
-#ifdef CONFIG_ISA_ARCV2
- [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
-#else
- [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
- [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
-#endif
- [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
- [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
-
- [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */
- [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */
- [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */
- [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */
- [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */
-
- [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */
- [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */
-};
-
-#define C(_x) PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED 0xffff
-
-static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
- [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC,
- [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS,
- [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
- [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB,
- },
- /* DTLB LD/ST Miss not segregated by h/w*/
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
- [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
-};
-
#endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index c0942c24d401..d36863e34bfc 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -99,8 +99,8 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
/*
* _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
- * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
- * syscall, so all that reamins to be tested is _TIF_WORK_MASK
+ * SYSCALL_TRACE is anyway separately/unconditionally tested right after a
+ * syscall, so all that remains to be tested is _TIF_WORK_MASK
*/
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 145722f80c9b..adff957962da 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -17,6 +17,168 @@
/* HW holds 8 symbols + one for null terminator */
#define ARCPMU_EVENT_NAME_LEN 9
+/*
+ * Some ARC pct quirks:
+ *
+ * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ * The ARC 700 can either measure stalls per pipeline stage, or all stalls
+ * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
+ * and all pipeline flushes (e.g. caused by mispredicts, etc.) to
+ * STALLED_CYCLES_FRONTEND.
+ *
+ * We could start multiple performance counters and combine everything
+ * afterwards, but that makes it complicated.
+ *
+ * Note that I$ cache misses aren't counted by either of the two!
+ */
+
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
+static const char * const arc_pmu_ev_hw_map[] = {
+ /* count cycles */
+ [PERF_COUNT_HW_CPU_CYCLES] = "crun",
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
+ [PERF_COUNT_HW_BUS_CYCLES] = "crun",
+
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
+
+ /* counts condition */
+ [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+ /* All jump instructions that are taken */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
+#ifdef CONFIG_ISA_ARCV2
+ [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
+#else
+ [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
+ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+#endif
+ [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
+ [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
+
+ [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */
+ [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */
+ [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */
+ [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */
+ [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */
+
+ [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */
+ [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED 0xffff
+
+static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
+ [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC,
+ [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS,
+ [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
+ [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB,
+ },
+ /* DTLB LD/ST Miss not segregated by h/w*/
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+ [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
enum arc_pmu_attr_groups {
ARCPMU_ATTR_GR_EVENTS,
ARCPMU_ATTR_GR_FORMATS,
@@ -328,7 +490,7 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
}
if (!(event->hw.state & PERF_HES_STOPPED)) {
- /* stop ARC pmu here */
+ /* stop hw counter here */
write_aux_reg(ARC_REG_PCT_INDEX, idx);
/* condition code #0 is always "never" */
@@ -361,7 +523,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)
{
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
+ int idx;
idx = ffz(pmu_cpu->used_mask[0]);
if (idx == arc_pmu->n_counters)
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 9e28058cdba8..200270a94558 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -245,14 +245,9 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
{
struct eh_frame_hdr_table_entry *e1 = p1;
struct eh_frame_hdr_table_entry *e2 = p2;
- unsigned long v;
-
- v = e1->start;
- e1->start = e2->start;
- e2->start = v;
- v = e1->fde;
- e1->fde = e2->fde;
- e2->fde = v;
+
+ swap(e1->start, e2->start);
+ swap(e1->fde, e2->fde);
}
static void init_unwind_hdr(struct unwind_table *table,
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 517988e60cfc..2a7fbbb83b70 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -32,7 +32,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
/*
* Cache operations depending on function and direction argument, inspired by
- * https://lkml.org/lkml/2018/5/18/979
+ * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk
* "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
* dma-mapping: provide a generic dma-noncoherent implementation)"
*
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 5787c261c9a4..dad27e4d69ff 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -149,8 +149,7 @@ retry:
/*
* Fault retry nuances, mmap_lock already relinquished by core mm
*/
- if (unlikely((fault & VM_FAULT_RETRY) &&
- (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ if (unlikely(fault & VM_FAULT_RETRY)) {
flags |= FAULT_FLAG_TRIED;
goto retry;
}
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 63ea5a606ecd..b821df7b0089 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -50,7 +50,7 @@ static void __init axs10x_enable_gpio_intc_wire(void)
* Current implementation of "irq-dw-apb-ictl" driver doesn't work well
* with stacked INTCs. In particular problem happens if its master INTC
* not yet instantiated. See discussion here -
- * https://lkml.org/lkml/2015/3/4/755
+ * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
*
* So setup the first gpio block as a passive pass thru and hide it from
* DT hardware topology - connect MB intc directly to cpu intc
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index b3ea1fa11f87..c4a875b22352 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -52,7 +52,7 @@ static void __init hsdk_enable_gpio_intc_wire(void)
* Current implementation of "irq-dw-apb-ictl" driver doesn't work well
* with stacked INTCs. In particular problem happens if its master INTC
* not yet instantiated. See discussion here -
- * https://lkml.org/lkml/2015/3/4/755
+ * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
*
* So setup the first gpio block as a passive pass thru and hide it from
* DT hardware topology - connect intc directly to cpu intc
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index cb9e48dcba88..976315dea958 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -66,8 +66,6 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_ARM
bool "ARM EABI stack unwinder"
depends on AEABI && !FUNCTION_GRAPH_TRACER
- # https://github.com/ClangBuiltLinux/linux/issues/732
- depends on !LD_IS_LLD || LLD_VERSION >= 110000
select ARM_UNWIND
help
This option enables stack unwinding support in the kernel
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 74d2f1401acb..954eee8a785a 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -76,10 +76,10 @@ CPPFLAGS_vmlinux.lds += -DTEXT_OFFSET="$(TEXT_OFFSET)"
CPPFLAGS_vmlinux.lds += -DMALLOC_SIZE="$(MALLOC_SIZE)"
compress-$(CONFIG_KERNEL_GZIP) = gzip
-compress-$(CONFIG_KERNEL_LZO) = lzo
-compress-$(CONFIG_KERNEL_LZMA) = lzma
-compress-$(CONFIG_KERNEL_XZ) = xzkern
-compress-$(CONFIG_KERNEL_LZ4) = lz4
+compress-$(CONFIG_KERNEL_LZO) = lzo_with_size
+compress-$(CONFIG_KERNEL_LZMA) = lzma_with_size
+compress-$(CONFIG_KERNEL_XZ) = xzkern_with_size
+compress-$(CONFIG_KERNEL_LZ4) = lz4_with_size
libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o
diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
index 383c632eba7b..a9ed79b7f871 100644
--- a/arch/arm/configs/bcm2835_defconfig
+++ b/arch/arm/configs/bcm2835_defconfig
@@ -31,7 +31,6 @@ CONFIG_ARCH_BCM2835=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_AEABI=y
CONFIG_KSM=y
-CONFIG_CLEANCACHE=y
CONFIG_CMA=y
CONFIG_SECCOMP=y
CONFIG_KEXEC=y
diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig
index 0daa9c0d298e..9981566f2096 100644
--- a/arch/arm/configs/qcom_defconfig
+++ b/arch/arm/configs/qcom_defconfig
@@ -27,7 +27,6 @@ CONFIG_PCIE_QCOM=y
CONFIG_SMP=y
CONFIG_PREEMPT=y
CONFIG_HIGHMEM=y
-CONFIG_CLEANCACHE=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CPU_IDLE=y
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index c92e42a5c8f7..8e94fe7ab5eb 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -264,7 +264,6 @@ static inline int find_next_bit_le(const void *p, int size, int offset)
#endif
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
/*
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index c576fa7d9bf8..0c70eb688a00 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -180,7 +180,10 @@ void pci_ioremap_set_mem_type(int mem_type);
static inline void pci_ioremap_set_mem_type(int mem_type) {}
#endif
-extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
+struct resource;
+
+#define pci_remap_iospace pci_remap_iospace
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
/*
* PCI configuration space mapping function.
diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c
index 3c2faf2bd124..3ec2afe78423 100644
--- a/arch/arm/kernel/atags_proc.c
+++ b/arch/arm/kernel/atags_proc.c
@@ -13,7 +13,7 @@ struct buffer {
static ssize_t atags_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct buffer *b = PDE_DATA(file_inode(file));
+ struct buffer *b = pde_data(file_inode(file));
return simple_read_from_buffer(buf, count, ppos, b->data, b->size);
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index c5e25cf7219b..da04ed85855a 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -335,7 +335,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
if (panic_on_oops)
panic("Fatal exception");
if (signr)
- do_exit(signr);
+ make_task_dead(signr);
}
/*
diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index ee91ac6b5ebf..2a493bdfffc6 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -38,6 +38,7 @@ static int num_pcie_ports;
static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
{
struct pcie_port *pp;
+ struct resource realio;
if (nr >= num_pcie_ports)
return 0;
@@ -53,10 +54,10 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
orion_pcie_setup(pp->base);
- if (pp->index == 0)
- pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE0_IO_PHYS_BASE);
- else
- pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE1_IO_PHYS_BASE);
+ realio.start = sys->busnr * SZ_64K;
+ realio.end = realio.start + SZ_64K - 1;
+ pci_remap_iospace(&realio, pp->index == 0 ? DOVE_PCIE0_IO_PHYS_BASE :
+ DOVE_PCIE1_IO_PHYS_BASE);
/*
* IORESOURCE_MEM
diff --git a/arch/arm/mach-iop32x/pci.c b/arch/arm/mach-iop32x/pci.c
index ab0010dc3145..7a215d2ee7e2 100644
--- a/arch/arm/mach-iop32x/pci.c
+++ b/arch/arm/mach-iop32x/pci.c
@@ -185,6 +185,7 @@ iop3xx_pci_abort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
{
struct resource *res;
+ struct resource realio;
if (nr != 0)
return 0;
@@ -206,7 +207,9 @@ int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
pci_add_resource_offset(&sys->resources, res, sys->mem_offset);
- pci_ioremap_io(0, IOP3XX_PCI_LOWER_IO_PA);
+ realio.start = 0;
+ realio.end = realio.start + SZ_64K - 1;
+ pci_remap_iospace(&realio, IOP3XX_PCI_LOWER_IO_PA);
return 1;
}
diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
index 636d84b40466..e15646af7f26 100644
--- a/arch/arm/mach-mv78xx0/pcie.c
+++ b/arch/arm/mach-mv78xx0/pcie.c
@@ -101,6 +101,7 @@ static void __init mv78xx0_pcie_preinit(void)
static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
{
struct pcie_port *pp;
+ struct resource realio;
if (nr >= num_pcie_ports)
return 0;
@@ -115,7 +116,9 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
orion_pcie_setup(pp->base);
- pci_ioremap_io(nr * SZ_64K, MV78XX0_PCIE_IO_PHYS_BASE(nr));
+ realio.start = nr * SZ_64K;
+ realio.end = realio.start + SZ_64K - 1;
+ pci_remap_iospace(&realio, MV78XX0_PCIE_IO_PHYS_BASE(nr));
pci_add_resource_offset(&sys->resources, &pp->res, sys->mem_offset);
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index 76951bfbacf5..92e938bba20d 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -142,6 +142,7 @@ static struct pci_ops pcie_ops = {
static int __init pcie_setup(struct pci_sys_data *sys)
{
struct resource *res;
+ struct resource realio;
int dev;
/*
@@ -164,7 +165,9 @@ static int __init pcie_setup(struct pci_sys_data *sys)
pcie_ops.read = pcie_rd_conf_wa;
}
- pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCIE_IO_PHYS_BASE);
+ realio.start = sys->busnr * SZ_64K;
+ realio.end = realio.start + SZ_64K - 1;
+ pci_remap_iospace(&realio, ORION5X_PCIE_IO_PHYS_BASE);
/*
* Request resources.
@@ -466,6 +469,7 @@ static void __init orion5x_setup_pci_wins(void)
static int __init pci_setup(struct pci_sys_data *sys)
{
struct resource *res;
+ struct resource realio;
/*
* Point PCI unit MBUS decode windows to DRAM space.
@@ -482,7 +486,9 @@ static int __init pci_setup(struct pci_sys_data *sys)
*/
orion5x_setbits(PCI_CMD, PCI_CMD_HOST_REORDER);
- pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCI_IO_PHYS_BASE);
+ realio.start = sys->busnr * SZ_64K;
+ realio.end = realio.start + SZ_64K - 1;
+ pci_remap_iospace(&realio, ORION5X_PCI_IO_PHYS_BASE);
/*
* Request resources
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index adbb3817d0be..6f499559d193 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -1005,7 +1005,7 @@ static int __init noalign_setup(char *__unused)
__setup("noalign", noalign_setup);
/*
- * This needs to be done after sysctl_init, otherwise sys/ will be
+ * This needs to be done after sysctl_init_bases(), otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
* it isn't a sysctl, and it doesn't contain sysctl information.
* We now locate it in /proc/cpu/alignment instead.
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index a1cebe363ed5..a062e07516dd 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -117,7 +117,7 @@ static void die_kernel_fault(const char *msg, struct mm_struct *mm,
show_pte(KERN_ALERT, mm, addr);
die("Oops", regs, fsr);
bust_spinlocks(0);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
/*
@@ -322,7 +322,7 @@ retry:
return 0;
}
- if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (!(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6e830b9418c9..197f8eb3a775 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -459,16 +459,20 @@ void pci_ioremap_set_mem_type(int mem_type)
pci_ioremap_mem_type = mem_type;
}
-int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
{
- BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
+ unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
- return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
- PCI_IO_VIRT_BASE + offset + SZ_64K,
- phys_addr,
+ if (!(res->flags & IORESOURCE_IO))
+ return -EINVAL;
+
+ if (res->end > IO_SPACE_LIMIT)
+ return -EINVAL;
+
+ return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
__pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
}
-EXPORT_SYMBOL_GPL(pci_ioremap_io);
+EXPORT_SYMBOL(pci_remap_iospace);
void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
{
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 543100151f2b..ac964612d8b0 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -463,3 +463,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f6e333b59314..6978140edfa4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -120,7 +120,6 @@ config ARM64
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
- select GENERIC_FIND_FIRST_BIT
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_IPI
select GENERIC_IRQ_PROBE
@@ -1136,6 +1135,10 @@ config NUMA
select GENERIC_ARCH_NUMA
select ACPI_NUMA if ACPI
select OF_NUMA
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -1152,22 +1155,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
source "kernel/Kconfig.hz"
config ARCH_SPARSEMEM_ENABLE
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index d955ade5df7c..5d460f6b7675 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -249,7 +249,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
" mov %" #w "[tmp], %" #w "[old]\n" \
" cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \
" mov %" #w "[ret], %" #w "[tmp]" \
- : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr), \
+ : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \
[tmp] "=&r" (tmp) \
: [old] "r" (x1), [new] "r" (x2) \
: cl); \
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 81a3e519b07d..9b3c787132d2 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f9bef42c1411..497acf134d99 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -243,7 +243,7 @@ static inline void __cmpwait_case_##sz(volatile void *ptr, \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
"1:" \
- : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \
: [val] "r" (val)); \
}
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
index 4d964a7f02ee..bc6c7ac934a1 100644
--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ b/arch/arm64/include/asm/hyperv-tlfs.h
@@ -64,6 +64,15 @@
#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
#define HV_REGISTER_STIMER0_COUNT 0x000B0001
+union hv_msi_entry {
+ u64 as_uint64[2];
+ struct {
+ u64 address;
+ u32 data;
+ u32 reserved;
+ } __packed;
+};
+
#include <asm-generic/hyperv-tlfs.h>
#endif
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 50d5e4de244c..d5b0386ef765 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f4871e47b2d0..d62405ce3e6d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -41,6 +41,8 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
@@ -386,7 +388,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
} else {
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- sctlr |= (1 << 25);
+ sctlr |= SCTLR_ELx_EE;
vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
}
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 541e7a813eb8..5bc01e62c08a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,6 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
-#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -298,9 +297,6 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* State of various workarounds, see kvm_asm.h for bit assignment */
- u64 workaround_flags;
-
/* Miscellaneous vcpu state flags */
u64 flags;
@@ -321,8 +317,8 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
- struct thread_info *host_thread_info; /* hyp VA */
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
+ struct task_struct *parent_task;
struct {
/* {Break,watch}point registers */
@@ -367,9 +363,6 @@ struct kvm_vcpu_arch {
int target;
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
- /* Detect first run of a vcpu */
- bool has_run_once;
-
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -411,20 +404,17 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
-#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
+/*
+ * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
+ * set together with an exception...
+ */
+#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
-#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
-#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
-
-#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
- KVM_GUESTDBG_USE_SW_BP | \
- KVM_GUESTDBG_USE_HW | \
- KVM_GUESTDBG_SINGLESTEP)
/*
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
* take the following values:
@@ -442,11 +432,14 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
-/*
- * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
- * set together with an exception...
- */
-#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
+#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
+#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
+#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
+
+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW | \
+ KVM_GUESTDBG_SINGLESTEP)
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
@@ -606,6 +599,8 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
+#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
+
#ifndef __KVM_NVHE_HYPERVISOR__
#define kvm_call_hyp_nvhe(f, ...) \
({ \
@@ -724,7 +719,6 @@ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
void kvm_arm_init_debug(void);
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
@@ -744,8 +738,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
@@ -756,12 +752,7 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
-#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
-static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
-{
- return kvm_arch_vcpu_run_map_fp(vcpu);
-}
-
+#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 5afd14ab15b9..462882f356c7 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -90,7 +90,6 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_save_state(void *sve_pffr, u32 *fpsr);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
#ifndef __KVM_NVHE_HYPERVISOR__
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 02d378887743..81839e9a8a24 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
+int kvm_share_hyp(void *from, void *to);
+void kvm_unshare_hyp(void *from, void *to);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 027783829584..9f339dffbc1a 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -252,6 +252,27 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot);
/**
+ * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ * @addr: Virtual address from which to remove the mapping.
+ * @size: Size of the mapping.
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * TLB invalidation is performed for each page-table entry cleared during the
+ * unmapping operation and the reference count for the page-table page
+ * containing the cleared entry is decremented, with unreferenced pages being
+ * freed. The unmapping operation will stop early if it encounters either an
+ * invalid page-table entry or a valid block mapping which maps beyond the range
+ * being unmapped.
+ *
+ * Return: Number of bytes unmapped, which may be 0.
+ */
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
* kvm_get_vtcr() - Helper to construct VTCR_EL2
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
* @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
@@ -270,8 +291,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
- * @arch: Arch-specific KVM structure representing the guest virtual
- * machine.
+ * @mmu: S2 MMU context for this S2 translation
* @mm_ops: Memory management callbacks.
* @flags: Stage-2 configuration flags.
* @force_pte_cb: Function that returns true if page level mappings must
@@ -279,13 +299,13 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
*
* Return: 0 on success, negative error code on failure.
*/
-int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
-#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
- __kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
+#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
+ __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
new file mode 100644
index 000000000000..9f4ad2a8df59
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+#ifndef __ARM64_KVM_PKVM_H__
+#define __ARM64_KVM_PKVM_H__
+
+#include <linux/memblock.h>
+#include <asm/kvm_pgtable.h>
+
+#define HYP_MEMBLOCK_REGIONS 128
+
+extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
+extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
+
+static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
+{
+ unsigned long total = 0, i;
+
+ /* Provision the worst case scenario */
+ for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
+ total += nr_pages;
+ }
+
+ return total;
+}
+
+static inline unsigned long __hyp_pgtable_total_pages(void)
+{
+ unsigned long res = 0, i;
+
+ /* Cover all of memory with page-granularity */
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
+ res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
+ }
+
+ return res;
+}
+
+static inline unsigned long hyp_s1_pgtable_pages(void)
+{
+ unsigned long res;
+
+ res = __hyp_pgtable_total_pages();
+
+ /* Allow 1 GiB for private mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+static inline unsigned long host_s2_pgtable_pages(void)
+{
+ unsigned long res;
+
+ /*
+ * Include an extra 16 pages to safely upper-bound the worst case of
+ * concatenated pgds.
+ */
+ res = __hyp_pgtable_total_pages() + 16;
+
+ /* Allow 1 GiB for MMIO mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index e9c30859f80c..48f8466a4be9 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -15,6 +15,7 @@
#ifndef __ASSEMBLY__
#include <linux/refcount.h>
+#include <asm/cpufeature.h>
typedef struct {
atomic64_t id;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4704f5893458..898bee0004ae 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -953,6 +953,7 @@
#define ID_AA64DFR0_PMUVER_8_1 0x4
#define ID_AA64DFR0_PMUVER_8_4 0x5
#define ID_AA64DFR0_PMUVER_8_5 0x6
+#define ID_AA64DFR0_PMUVER_8_7 0x7
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
#define ID_AA64DFR0_PMSVER_8_2 0x1
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 6bdb5f5db438..4e65da3445c7 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 450
+#define __NR_compat_syscalls 451
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 41ea1195e44b..604a2053d006 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -905,6 +905,8 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 6d0c3afd36b8..1197e7679882 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -111,7 +111,6 @@ int main(void)
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
- DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f2307d6631eb..5280e098cfb5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -79,7 +79,11 @@
* indicate whether or not the userland FPSIMD state of the current task is
* present in the registers. The flag is set unless the FPSIMD registers of this
* CPU currently contain the most recent userland FPSIMD state of the current
- * task.
+ * task. If the task is behaving as a VMM, then this is will be managed by
+ * KVM which will clear it to indicate that the vcpu FPSIMD state is currently
+ * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
+ * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
+ * flag the register state as invalid.
*
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
* save the task's FPSIMD context back to task_struct from softirq context.
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index b5ec010c481f..309a27553c87 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -36,7 +36,7 @@ void *module_alloc(unsigned long size)
module_alloc_end = MODULES_END;
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
+ module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index e8986e6067a9..70fc42470f13 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -235,7 +235,7 @@ void die(const char *str, struct pt_regs *regs, int err)
raw_spin_unlock_irqrestore(&die_lock, flags);
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
static void arm64_show_signal(int signo, const char *str)
diff --git a/arch/arm64/kvm/.gitignore b/arch/arm64/kvm/.gitignore
new file mode 100644
index 000000000000..6182aefb8302
--- /dev/null
+++ b/arch/arm64/kvm/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+hyp_constants.h
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index e9761d84f982..8a5fbbf084df 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -40,6 +40,7 @@ menuconfig KVM
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
+ select INTERVAL_TREE
help
Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 0bcc378b7961..91861fd8b897 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -5,17 +5,15 @@
ccflags-y += -I $(srctree)/$(src)
-KVM=../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
-kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
- $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
- arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
+kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
- vgic-sys-reg-v3.o fpsimd.o pmu.o \
+ vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
arch_timer.o trng.o\
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
@@ -25,3 +23,19 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
+
+always-y := hyp_constants.h hyp-constants.s
+
+define rule_gen_hyp_constants
+ $(call filechk,offsets,__HYP_CONSTANTS_H__)
+endef
+
+CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
+$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
+ $(call if_changed_dep,cc_s_c)
+
+$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE
+ $(call if_changed_rule,gen_hyp_constants)
+
+obj-kvm := $(addprefix $(obj)/, $(kvm-y))
+$(obj-kvm): $(obj)/hyp_constants.h
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3df67c127489..6e542e2eae32 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -467,7 +467,7 @@ out:
}
/*
- * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * Schedule the background timer before calling kvm_vcpu_halt, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
* interrupt to handle.
*/
@@ -649,7 +649,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
- struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
if (unlikely(!timer->enabled))
return;
@@ -672,7 +671,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
- if (rcuwait_active(wait))
+ if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
/*
@@ -750,7 +749,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
/* Make the updates of cntvoff for all vtimer contexts atomic */
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
{
- int i;
+ unsigned long i;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
@@ -1189,8 +1188,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
- int vtimer_irq, ptimer_irq;
- int i, ret;
+ int vtimer_irq, ptimer_irq, ret;
+ unsigned long i;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
@@ -1297,7 +1296,7 @@ void kvm_timer_init_vhe(void)
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f026fd01bf7b..a4a0063df456 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
- ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
+ ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
@@ -175,19 +175,13 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
*/
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
bitmap_free(kvm->arch.pmu_filter);
kvm_vgic_destroy(kvm);
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
+
+ kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -342,7 +336,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ return kvm_share_hyp(vcpu, vcpu + 1);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -351,7 +345,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -368,27 +362,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- /*
- * If we're about to block (most likely because we've just hit a
- * WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the latest PMR and group enables. This ensures
- * that kvm_arch_vcpu_runnable has up-to-date data to decide
- * whether we have pending interrupts.
- *
- * For the same reason, we want to tell GICv4 that we need
- * doorbells to be signalled, should an interrupt become pending.
- */
- preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
- preempt_enable();
+
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- preempt_disable();
- vgic_v4_load(vcpu);
- preempt_enable();
+
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -591,18 +570,33 @@ static void update_vmid(struct kvm_vmid *vmid)
spin_unlock(&kvm_vmid_lock);
}
-static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.target >= 0;
+}
+
+/*
+ * Handle both the initialisation that is being done when the vcpu is
+ * run for the first time, as well as the updates that must be
+ * performed each time we get a new thread dealing with this vcpu.
+ */
+int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int ret = 0;
+ int ret;
- if (likely(vcpu->arch.has_run_once))
- return 0;
+ if (!kvm_vcpu_initialized(vcpu))
+ return -ENOEXEC;
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- vcpu->arch.has_run_once = true;
+ ret = kvm_arch_vcpu_run_map_fp(vcpu);
+ if (ret)
+ return ret;
+
+ if (likely(vcpu_has_run_once(vcpu)))
+ return 0;
kvm_arm_vcpu_init_debug(vcpu);
@@ -614,12 +608,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
- } else {
- /*
- * Tell the rest of the code that there are userspace irqchip
- * VMs in the wild.
- */
- static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -627,6 +615,16 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return ret;
ret = kvm_arm_pmu_v3_enable(vcpu);
+ if (ret)
+ return ret;
+
+ if (!irqchip_in_kernel(kvm)) {
+ /*
+ * Tell the rest of the code that there are userspace irqchip
+ * VMs in the wild.
+ */
+ static_branch_inc(&userspace_irqchip_in_use);
+ }
/*
* Initialize traps for protected VMs.
@@ -646,7 +644,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
void kvm_arm_halt_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -656,12 +654,12 @@ void kvm_arm_halt_guest(struct kvm *kvm)
void kvm_arm_resume_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
- rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+ __kvm_vcpu_wake_up(vcpu);
}
}
@@ -686,9 +684,37 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
smp_rmb();
}
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+/**
+ * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
+ * @vcpu: The VCPU pointer
+ *
+ * Suspend execution of a vCPU until a valid wake event is detected, i.e. until
+ * the vCPU is runnable. The vCPU may or may not be scheduled out, depending
+ * on when a wake event arrives, e.g. there may already be a pending wake event.
+ */
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.target >= 0;
+ /*
+ * Sync back the state of the GIC CPU interface so that we have
+ * the latest PMR and group enables. This ensures that
+ * kvm_arch_vcpu_runnable has up-to-date data to decide whether
+ * we have pending interrupts, e.g. when determining if the
+ * vCPU should block.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
+ */
+ preempt_disable();
+ kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
+ preempt_enable();
+
+ kvm_vcpu_halt(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
@@ -786,13 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int ret;
- if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
-
- ret = kvm_vcpu_first_run_init(vcpu);
- if (ret)
- return ret;
-
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
if (ret)
@@ -856,6 +875,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
kvm_arm_setup_debug(vcpu);
+ kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
* Enter the guest
@@ -1130,7 +1150,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* need to invalidate the I-cache though, as FWB does *not*
* imply CTR_EL0.DIC.
*/
- if (vcpu->arch.has_run_once) {
+ if (vcpu_has_run_once(vcpu)) {
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
@@ -2043,7 +2063,7 @@ static int finalize_hyp_mode(void)
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
kvm_for_each_vcpu(i, vcpu, kvm) {
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 5621020b28de..2f48fd362a8c 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -7,7 +7,6 @@
*/
#include <linux/irqflags.h>
#include <linux/sched.h>
-#include <linux/thread_info.h>
#include <linux/kvm_host.h>
#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
@@ -15,6 +14,19 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
+void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
+{
+ struct task_struct *p = vcpu->arch.parent_task;
+ struct user_fpsimd_state *fpsimd;
+
+ if (!is_protected_kvm_enabled() || !p)
+ return;
+
+ fpsimd = &p->thread.uw.fpsimd_state;
+ kvm_unshare_hyp(fpsimd, fpsimd + 1);
+ put_task_struct(p);
+}
+
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
int ret;
- struct thread_info *ti = &current->thread_info;
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- /*
- * Make sure the host task thread flags and fpsimd state are
- * visible to hyp:
- */
- ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
- if (ret)
- goto error;
+ kvm_vcpu_unshare_task_fp(vcpu);
- ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+ /* Make sure the host task fpsimd state is visible to hyp: */
+ ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
- goto error;
-
- if (vcpu->arch.sve_state) {
- void *sve_end;
+ return ret;
- sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
+ vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
- ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
- PAGE_HYP);
- if (ret)
- goto error;
+ /*
+ * We need to keep current's task_struct pinned until its data has been
+ * unshared with the hypervisor to make sure it is not re-used by the
+ * kernel and donated to someone else while already shared -- see
+ * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
+ */
+ if (is_protected_kvm_enabled()) {
+ get_task_struct(current);
+ vcpu->arch.parent_task = current;
}
- vcpu->arch.host_thread_info = kern_hyp_va(ti);
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-error:
- return ret;
+ return 0;
}
/*
@@ -66,26 +71,27 @@ error:
*
* Here, we just set the correct metadata to indicate that the FPSIMD
* state in the cpu regs (if any) belongs to current on the host.
- *
- * TIF_SVE is backed up here, since it may get clobbered with guest state.
- * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
*/
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
+ BUG_ON(test_thread_flag(TIF_SVE));
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_HOST_SVE_IN_USE |
- KVM_ARM64_HOST_SVE_ENABLED);
+ vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
- if (test_thread_flag(TIF_SVE))
- vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
-
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
+{
+ if (test_thread_flag(TIF_FOREIGN_FPSTATE))
+ vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
+ else
+ vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
+}
+
/*
* If the guest FPSIMD state was loaded, update the host's context
* tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
@@ -115,13 +121,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
- bool host_has_sve = system_supports_sve();
- bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- if (guest_has_sve) {
+ if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
/* Restore the VL that was saved when bound to the CPU */
@@ -131,7 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
}
fpsimd_save_and_flush_cpu_state();
- } else if (has_vhe() && host_has_sve) {
+ } else if (has_vhe() && system_supports_sve()) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
@@ -145,8 +149,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
- update_thread_flag(TIF_SVE,
- vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+ update_thread_flag(TIF_SVE, 0);
local_irq_restore(flags);
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 5abe0617f2af..fd2dd26caf91 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -82,7 +82,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*
* WFE: Yield the CPU and come back to this vcpu when the scheduler
* decides to.
- * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
@@ -95,8 +95,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
- kvm_vcpu_block(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ kvm_vcpu_wfi(vcpu);
}
kvm_incr_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index b726332eec49..687598e41b21 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index e950875e31ce..61e6f3ba7b7d 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
- mov x2, #1
- sve_save 0, x1, x2, 3
- ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
new file mode 100644
index 000000000000..b3742a6691e8
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/kbuild.h>
+#include <nvhe/memory.h>
+
+int main(void)
+{
+ DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 96c5f3fb7838..58e14f8ead23 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -29,7 +29,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -49,7 +48,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
@@ -143,16 +142,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
-{
- struct thread_struct *thread;
-
- thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
- uw.fpsimd_state);
-
- __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
-}
-
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
@@ -169,21 +158,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
*/
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- bool sve_guest, sve_host;
+ bool sve_guest;
u8 esr_ec;
u64 reg;
if (!system_supports_fpsimd())
return false;
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- } else {
- sve_guest = false;
- sve_host = false;
- }
-
+ sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
/* Don't handle SVE traps for non-SVE vcpus here: */
@@ -207,11 +189,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- if (sve_host)
- __hyp_sve_save_host(vcpu);
- else
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index b58c910babaf..80e99836eac7 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
+ __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
+ KVM_PGTABLE_PROT_SW1,
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
+int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c9a8f535212e..2d08510c6cc1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -10,13 +10,8 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_MEMBLOCK_REGIONS 128
-extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
-extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-extern struct hyp_pool hpool;
-extern u64 __io_map_base;
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
@@ -39,58 +34,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
*end = ALIGN(*end, PAGE_SIZE);
}
-static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
-{
- unsigned long total = 0, i;
-
- /* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
- nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
- total += nr_pages;
- }
-
- return total;
-}
-
-static inline unsigned long __hyp_pgtable_total_pages(void)
-{
- unsigned long res = 0, i;
-
- /* Cover all of memory with page-granularity */
- for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
- struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
- res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
- }
-
- return res;
-}
-
-static inline unsigned long hyp_s1_pgtable_pages(void)
-{
- unsigned long res;
-
- res = __hyp_pgtable_total_pages();
-
- /* Allow 1 GiB for private mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
-static inline unsigned long host_s2_pgtable_pages(void)
-{
- unsigned long res;
-
- /*
- * Include an extra 16 pages to safely upper-bound the worst case of
- * concatenated pgds.
- */
- res = __hyp_pgtable_total_pages() + 16;
-
- /* Allow 1 GiB for MMIO mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
index 1306c430ab87..00de04153cc6 100644
--- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
+static void hyp_early_alloc_get_page(void *addr) { }
+static void hyp_early_alloc_put_page(void *addr) { }
+
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+ hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
+ hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b096bf009144..5e2197db0d32 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
+static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
+}
+
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp),
+ HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index c1a90dd022b8..674f10564373 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -9,6 +9,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/stage2_pgtable.h>
#include <hyp/fault.h>
@@ -27,6 +28,26 @@ static struct hyp_pool host_s2_pool;
const u8 pkvm_hyp_id = 1;
+static void host_lock_component(void)
+{
+ hyp_spin_lock(&host_kvm.lock);
+}
+
+static void host_unlock_component(void)
+{
+ hyp_spin_unlock(&host_kvm.lock);
+}
+
+static void hyp_lock_component(void)
+{
+ hyp_spin_lock(&pkvm_pgd_lock);
+}
+
+static void hyp_unlock_component(void)
+{
+ hyp_spin_unlock(&pkvm_pgd_lock);
+}
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -103,19 +124,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
+ mmu->arch = &host_kvm.arch;
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
- ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
if (ret)
return ret;
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
- mmu->arch = &host_kvm.arch;
mmu->pgt = &host_kvm.pgt;
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
WRITE_ONCE(mmu->vmid.vmid, 0);
@@ -338,116 +359,446 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
- hyp_spin_lock(&host_kvm.lock);
+ host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
- hyp_spin_unlock(&host_kvm.lock);
+ host_unlock_component();
return ret;
}
-static inline bool check_prot(enum kvm_pgtable_prot prot,
- enum kvm_pgtable_prot required,
- enum kvm_pgtable_prot denied)
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
- return (prot & (required | denied)) == required;
+ struct kvm_vcpu_fault_info fault;
+ u64 esr, addr;
+ int ret = 0;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ BUG_ON(!__get_fault_info(esr, &fault));
+
+ addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+ ret = host_stage2_idmap(addr);
+ BUG_ON(ret && ret != -EAGAIN);
}
-int __pkvm_host_share_hyp(u64 pfn)
+/* This corresponds to locking order */
+enum pkvm_component_id {
+ PKVM_ID_HOST,
+ PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+ u64 nr_pages;
+
+ struct {
+ enum pkvm_component_id id;
+ /* Address in the initiator's address space */
+ u64 addr;
+
+ union {
+ struct {
+ /* Address in the completer's address space */
+ u64 completer_addr;
+ } host;
+ };
+ } initiator;
+
+ struct {
+ enum pkvm_component_id id;
+ } completer;
+};
+
+struct pkvm_mem_share {
+ const struct pkvm_mem_transition tx;
+ const enum kvm_pgtable_prot completer_prot;
+};
+
+struct check_walk_data {
+ enum pkvm_page_state desired;
+ enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag,
+ void * const arg)
{
- phys_addr_t addr = hyp_pfn_to_phys(pfn);
- enum kvm_pgtable_prot prot, cur;
- void *virt = __hyp_va(addr);
- enum pkvm_page_state state;
- kvm_pte_t pte;
- int ret;
+ struct check_walk_data *d = arg;
+ kvm_pte_t pte = *ptep;
- if (!addr_is_memory(addr))
+ if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
- hyp_spin_lock(&host_kvm.lock);
- hyp_spin_lock(&pkvm_pgd_lock);
+ return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct check_walk_data *data)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = __check_page_state_visitor,
+ .arg = data,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte) && pte)
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = host_get_page_state,
+ };
+
+ hyp_assert_lock_held(&host_kvm.lock);
+ return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+ return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_request_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = hyp_get_page_state,
+ };
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+ return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+ return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+ tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (perms != PAGE_HYP)
+ return -EPERM;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size,
+ PKVM_PAGE_SHARED_BORROWED);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ enum kvm_pgtable_prot prot;
+
+ prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+ return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+
+ return (ret != size) ? -EFAULT : 0;
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_owned_transition(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
- goto unlock;
- if (!pte)
- goto map_shared;
+ return ret;
- /*
- * Check attributes in the host stage-2 PTE. We need the page to be:
- * - mapped RWX as we're sharing memory;
- * - not borrowed, as that implies absence of ownership.
- * Otherwise, we can't let it got through
- */
- cur = kvm_pgtable_stage2_pte_prot(pte);
- prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
}
- state = pkvm_getstate(cur);
- if (state == PKVM_PAGE_OWNED)
- goto map_shared;
+ return ret;
+}
- /*
- * Tolerate double-sharing the same page, but this requires
- * cross-checking the hypervisor stage-1.
- */
- if (state != PKVM_PAGE_SHARED_OWNED) {
- ret = -EPERM;
- goto unlock;
+static int __do_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_share(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
}
- ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
- goto unlock;
+ return ret;
- /*
- * If the page has been shared with the hypervisor, it must be
- * already mapped as SHARED_BORROWED in its stage-1.
- */
- cur = kvm_pgtable_hyp_pte_prot(pte);
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, prot, ~prot))
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ }
-map_shared:
- /*
- * If the page is not yet shared, adjust mappings in both page-tables
- * while both locks are held.
- */
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
- BUG_ON(ret);
+ return ret;
+}
+
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED => SHARED_OWNED
+ * Completer: NOPAGE => SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+ int ret;
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
- ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
- BUG_ON(ret);
+ ret = check_share(share);
+ if (ret)
+ return ret;
-unlock:
- hyp_spin_unlock(&pkvm_pgd_lock);
- hyp_spin_unlock(&host_kvm.lock);
+ return WARN_ON(__do_share(share));
+}
+
+static int check_unshare(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
return ret;
}
-void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
+static int __do_unshare(struct pkvm_mem_share *share)
{
- struct kvm_vcpu_fault_info fault;
- u64 esr, addr;
- int ret = 0;
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
- esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
- ret = host_stage2_idmap(addr);
- BUG_ON(ret && ret != -EAGAIN);
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * do_unshare():
+ *
+ * The page owner revokes access from another component for a range of
+ * pages which were previously shared using do_share().
+ *
+ * Initiator: SHARED_OWNED => OWNED
+ * Completer: SHARED_BORROWED => NOPAGE
+ */
+static int do_unshare(struct pkvm_mem_share *share)
+{
+ int ret;
+
+ ret = check_unshare(share);
+ if (ret)
+ return ret;
+
+ return WARN_ON(__do_unshare(share));
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_share(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
+int __pkvm_host_unshare_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_unshare(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 2fabeceb889a..526a7d6fa86f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/spectre.h>
#include <nvhe/early_alloc.h>
@@ -18,11 +19,12 @@
struct kvm_pgtable pkvm_pgtable;
hyp_spinlock_t pkvm_pgd_lock;
-u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
+static u64 __io_map_base;
+
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 0bd7701ad1df..543cad6c376a 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 578f71798c2e..27af337f9fea 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
@@ -17,7 +18,6 @@
#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
-struct hyp_pool hpool;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +27,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
+static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -165,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -173,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
+ /*
+ * Fix-up the refcount for the page-table pages as the early allocator
+ * was unable to access the hyp_vmemmap and so the buddy allocator has
+ * initialised the refcount to '1'.
+ */
+ mm_ops->get_page(ptep);
+ if (flag != KVM_PGTABLE_WALK_LEAF)
+ return 0;
+
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -205,7 +216,8 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
- .flags = KVM_PGTABLE_WALK_LEAF,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pkvm_pgtable.mm_ops,
};
int i, ret;
@@ -240,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = finalize_host_mappings();
- if (ret)
- goto out;
-
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
+ .page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
+ ret = finalize_host_mappings();
+ if (ret)
+ goto out;
+
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index d13115a12434..6410d21d8695 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,7 +25,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f8ceebe4982e..844a6f003fd5 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
-static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
-{
- /*
- * Tolerate KVM recreating the exact same mapping, or changing software
- * bits if the existing mapping was valid.
- */
- if (old == new)
- return false;
-
- if (!kvm_pte_valid(old))
- return true;
-
- return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
-}
-
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
+ data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
- if (hyp_pte_needs_update(old, new))
- smp_store_release(ptep, new);
+ if (old == new)
+ return true;
+ if (!kvm_pte_valid(old))
+ data->mm_ops->get_page(ptep);
+ else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
+ return false;
- data->phys += granule;
+ smp_store_release(ptep, new);
return true;
}
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
+ mm_ops->get_page(ptep);
return 0;
}
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
+struct hyp_unmap_data {
+ u64 unmapped;
+ struct kvm_pgtable_mm_ops *mm_ops;
+};
+
+static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ kvm_pte_t pte = *ptep, *childp = NULL;
+ u64 granule = kvm_granule_size(level);
+ struct hyp_unmap_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+
+ if (!kvm_pte_valid(pte))
+ return -EINVAL;
+
+ if (kvm_pte_table(pte, level)) {
+ childp = kvm_pte_follow(pte, mm_ops);
+
+ if (mm_ops->page_count(childp) != 1)
+ return 0;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
+ } else {
+ if (end - addr < granule)
+ return -EINVAL;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
+ data->unmapped += granule;
+ }
+
+ dsb(ish);
+ isb();
+ mm_ops->put_page(ptep);
+
+ if (childp)
+ mm_ops->put_page(childp);
+
+ return 0;
+}
+
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct hyp_unmap_data unmap_data = {
+ .mm_ops = pgt->mm_ops,
+ };
+ struct kvm_pgtable_walker walker = {
+ .cb = hyp_unmap_walker,
+ .arg = &unmap_data,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ };
+
+ if (!pgt->mm_ops->page_count)
+ return 0;
+
+ kvm_pgtable_walk(pgt, addr, size, &walker);
+ return unmap_data.unmapped;
+}
+
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
+ kvm_pte_t pte = *ptep;
+
+ if (!kvm_pte_valid(pte))
+ return 0;
+
+ mm_ops->put_page(ptep);
+
+ if (kvm_pte_table(pte, level))
+ mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
- mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
- .flags = KVM_PGTABLE_WALK_TABLE_POST,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
@@ -1116,13 +1178,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
-int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = arch->vtcr;
+ u64 vtcr = mmu->arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -1135,7 +1197,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
- pgt->mmu = &arch->mmu;
+ pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index fbb26b93c347..11d053fdd604 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 326cdfec74a1..bc2aba953299 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -239,6 +239,9 @@ void free_hyp_pgds(void)
static bool kvm_host_owns_hyp_mappings(void)
{
+ if (is_kernel_in_hyp_mode())
+ return false;
+
if (static_branch_likely(&kvm_protected_mode_initialized))
return false;
@@ -281,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
-static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+struct hyp_shared_pfn {
+ u64 pfn;
+ int count;
+ struct rb_node node;
+};
+
+static DEFINE_MUTEX(hyp_shared_pfns_lock);
+static struct rb_root hyp_shared_pfns = RB_ROOT;
+
+static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
+ struct rb_node **parent)
{
- phys_addr_t addr;
+ struct hyp_shared_pfn *this;
+
+ *node = &hyp_shared_pfns.rb_node;
+ *parent = NULL;
+ while (**node) {
+ this = container_of(**node, struct hyp_shared_pfn, node);
+ *parent = **node;
+ if (this->pfn < pfn)
+ *node = &((**node)->rb_left);
+ else if (this->pfn > pfn)
+ *node = &((**node)->rb_right);
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static int share_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (this) {
+ this->count++;
+ goto unlock;
+ }
+
+ this = kzalloc(sizeof(*this), GFP_KERNEL);
+ if (!this) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ this->pfn = pfn;
+ this->count = 1;
+ rb_link_node(&this->node, parent, node);
+ rb_insert_color(&this->node, &hyp_shared_pfns);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+static int unshare_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (WARN_ON(!this)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ this->count--;
+ if (this->count)
+ goto unlock;
+
+ rb_erase(&this->node, &hyp_shared_pfns);
+ kfree(this);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+int kvm_share_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
int ret;
- for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
- __phys_to_pfn(addr));
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
+ /*
+ * The share hcall maps things in the 'fixed-offset' region of the hyp
+ * VA space, so we can only share physically contiguous data-structures
+ * for now.
+ */
+ if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
+ return -EINVAL;
+
+ if (kvm_host_owns_hyp_mappings())
+ return create_hyp_mappings(from, to, PAGE_HYP);
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
@@ -296,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
+void kvm_unshare_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
+
+ if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
+ return;
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ WARN_ON(unshare_pfn_hyp(pfn));
+ }
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -316,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
- if (!kvm_host_owns_hyp_mappings()) {
- if (WARN_ON(prot != PAGE_HYP))
- return -EPERM;
- return pkvm_share_hyp(kvm_kaddr_to_phys(from),
- kvm_kaddr_to_phys(to));
- }
+ if (!kvm_host_owns_hyp_mappings())
+ return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -407,6 +525,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
unsigned long addr;
int ret;
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
*kaddr = ioremap(phys_addr, size);
if (!*kaddr)
return -ENOMEM;
@@ -516,7 +637,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
if (!pgt)
return -ENOMEM;
- err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
+ mmu->arch = &kvm->arch;
+ err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -529,7 +651,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
- mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
@@ -595,14 +716,14 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -650,6 +771,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
size += offset_in_page(guest_ipa);
guest_ipa &= PAGE_MASK;
@@ -1463,7 +1587,6 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -1473,25 +1596,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
- kvm_mmu_wp_memory_region(kvm, mem->slot);
+ kvm_mmu_wp_memory_region(kvm, new->id);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
+ hva_t hva, reg_end;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1624,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ reg_end = hva + (new->npages << PAGE_SHIFT);
+
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1661,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
break;
}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/pkvm.c
index 578670e3f608..ebecb7c045f4 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -8,10 +8,9 @@
#include <linux/memblock.h>
#include <linux/sort.h>
-#include <asm/kvm_host.h>
+#include <asm/kvm_pkvm.h>
-#include <nvhe/memory.h>
-#include <nvhe/mm.h>
+#include "hyp_constants.h"
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
@@ -82,7 +81,8 @@ void __init kvm_hyp_reserve(void)
do {
prev = nr_pages;
nr_pages = hyp_mem_pages + prev;
- nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
+ PAGE_SIZE);
nr_pages += __hyp_pgtable_max_pages(nr_pages);
} while (nr_pages != prev);
hyp_mem_pages += nr_pages;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 3308ceefa129..fbcfd4ec6f92 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -30,6 +30,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
case ID_AA64DFR0_PMUVER_8_1:
case ID_AA64DFR0_PMUVER_8_4:
case ID_AA64DFR0_PMUVER_8_5:
+ case ID_AA64DFR0_PMUVER_8_7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
@@ -902,7 +903,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
*/
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 74c47d420253..3eae32876897 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -46,7 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
@@ -109,7 +109,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
/*
* Make sure the reset request is observed if the change to
- * power_state is observed.
+ * power_off is observed.
*/
smp_wmb();
@@ -121,8 +121,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
{
- int i, matching_cpus = 0;
- unsigned long mpidr;
+ int matching_cpus = 0;
+ unsigned long i, mpidr;
unsigned long target_affinity;
unsigned long target_affinity_mask;
unsigned long lowest_affinity_level;
@@ -164,7 +164,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *tmp;
/*
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 27386f0d81e4..ecc40c8cd6f6 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -94,22 +94,31 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
{
void *buf;
unsigned int vl;
+ size_t reg_sz;
+ int ret;
vl = vcpu->arch.sve_max_vl;
/*
* Responsibility for these properties is shared between
- * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
+ * kvm_arm_init_sve(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
vl > VL_ARCH_MAX))
return -EIO;
- buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
+ reg_sz = vcpu_sve_state_size(vcpu);
+ buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
+ ret = kvm_share_hyp(buf, buf + reg_sz);
+ if (ret) {
+ kfree(buf);
+ return ret;
+ }
+
vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
return 0;
@@ -141,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- kfree(vcpu->arch.sve_state);
+ void *sve_state = vcpu->arch.sve_state;
+
+ kvm_vcpu_unshare_task_fp(vcpu);
+ kvm_unshare_hyp(vcpu, vcpu + 1);
+ if (sve_state)
+ kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
+ kfree(sve_state);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -170,7 +185,7 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tmp;
bool is32bit;
- int i;
+ unsigned long i;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
@@ -193,10 +208,9 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
*
- * This function finds the right table above and sets the registers on
- * the virtual CPU struct to their architecturally defined reset
- * values, except for registers whose reset is deferred until
- * kvm_arm_vcpu_finalize().
+ * This function sets the registers on the virtual CPU struct to their
+ * architecturally defined reset values, except for registers whose reset is
+ * deferred until kvm_arm_vcpu_finalize().
*
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 0a06d0648970..fc00304fe7d8 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, ret;
struct kvm_vcpu *vcpu;
+ unsigned long i;
+ int ret;
if (irqchip_in_kernel(kvm))
return -EEXIST;
@@ -91,7 +92,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->arch.has_run_once)
+ if (vcpu_has_run_once(vcpu))
goto out_unlock;
}
ret = 0;
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int ret = 0, i, idx;
+ int ret = 0, i;
+ unsigned long idx;
if (vgic_initialized(kvm))
return 0;
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
goto out;
}
- kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
@@ -370,7 +372,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
vgic_debug_destroy(kvm);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 0d000d2fe8d2..c6d52a1fd9c8 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -325,7 +325,7 @@ void unlock_all_vcpus(struct kvm *kvm)
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
- int c;
+ unsigned long c;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 5f9014ae595b..12e4c223e6b8 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -113,9 +113,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
int intid = val & 0xf;
int targets = (val >> 16) & 0xff;
int mode = (val >> 24) & 0x03;
- int c;
struct kvm_vcpu *vcpu;
- unsigned long flags;
+ unsigned long flags, c;
switch (mode) {
case 0x0: /* as specified by targets */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index bf7ec4a78497..58e40b4874f8 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -754,7 +754,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c, ret = 0;
+ unsigned long c;
+ int ret = 0;
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
@@ -763,10 +764,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
}
if (ret) {
- /* The current c failed, so we start with the previous one. */
+ /* The current c failed, so iterate over the previous ones. */
+ int i;
+
mutex_lock(&kvm->slots_lock);
- for (c--; c >= 0; c--) {
- vcpu = kvm_get_vcpu(kvm, c);
+ for (i = 0; i < c; i++) {
+ vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
@@ -995,10 +998,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
struct kvm_vcpu *c_vcpu;
u16 target_cpus;
u64 mpidr;
- int sgi, c;
+ int sgi;
int vcpu_id = vcpu->vcpu_id;
bool broadcast;
- unsigned long flags;
+ unsigned long c, flags;
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 48c6067fc5ec..7068da080799 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -1050,7 +1050,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
return 0;
}
-struct kvm_io_device_ops kvm_io_gic_ops = {
+const struct kvm_io_device_ops kvm_io_gic_ops = {
.read = dispatch_mmio_read,
.write = dispatch_mmio_write,
};
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index fefcca2b14dc..3fa696f198a3 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
@@ -34,7 +34,7 @@ struct vgic_register_region {
};
};
-extern struct kvm_io_device_ops kvm_io_gic_ops;
+extern const struct kvm_io_device_ops kvm_io_gic_ops;
#define VGIC_ACCESS_8bit 1
#define VGIC_ACCESS_32bit 2
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 95a18cec14a3..645648349c99 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
- kvm_err("Need to set vgic cpu and dist addresses first\n");
+ kvm_debug("Need to set vgic cpu and dist addresses first\n");
return -ENXIO;
}
if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
- kvm_err("VGIC CPU and dist frames overlap\n");
+ kvm_debug("VGIC CPU and dist frames overlap\n");
return -EINVAL;
}
@@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
int ret;
u32 vtr;
+ if (is_protected_kvm_enabled()) {
+ kvm_err("GICv2 not supported in protected mode\n");
+ return -ENXIO;
+ }
+
if (!info->vctrl.start) {
kvm_err("GICH not present in the firmware table\n");
return -ENXIO;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 04f62c4b07fb..a33d4366b326 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -542,24 +542,24 @@ int vgic_v3_map_resources(struct kvm *kvm)
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
- int c;
+ unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
- kvm_debug("vcpu %d redistributor base not set\n", c);
+ kvm_debug("vcpu %ld redistributor base not set\n", c);
return -ENXIO;
}
}
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
- kvm_err("Need to set vgic distributor addresses first\n");
+ kvm_debug("Need to set vgic distributor addresses first\n");
return -ENXIO;
}
if (!vgic_v3_check_base(kvm)) {
- kvm_err("VGIC redist and dist frames overlap\n");
+ kvm_debug("VGIC redist and dist frames overlap\n");
return -EINVAL;
}
@@ -651,7 +651,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)info->vcpu.start);
- } else {
+ } else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.can_emulate_gicv2 = true;
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 772dd15a22c7..ad06ba6c9b00 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_arm_halt_guest(kvm);
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i, nr_vcpus, ret;
+ int nr_vcpus, ret;
+ unsigned long i;
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 5dad4996cfb2..9b98876a8a93 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c;
+ unsigned long c;
/*
* We've injected an interrupt, time to find out who deserves
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9a9e7675b187..77341b160aca 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -304,7 +304,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
#ifdef CONFIG_KASAN_HW_TAGS
@@ -608,10 +608,8 @@ retry:
}
if (fault & VM_FAULT_RETRY) {
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a8834434af99..db63cc885771 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -172,7 +172,7 @@ int pfn_is_map_memory(unsigned long pfn)
}
EXPORT_SYMBOL(pfn_is_map_memory);
-static phys_addr_t memory_limit = PHYS_ADDR_MAX;
+static phys_addr_t memory_limit __ro_after_init = PHYS_ADDR_MAX;
/*
* Limit the memory size that was specified via FDT.
diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c
index cb2a0d94a144..2df115d0e210 100644
--- a/arch/csky/abiv1/alignment.c
+++ b/arch/csky/abiv1/alignment.c
@@ -294,7 +294,7 @@ bad_area:
__func__, opcode, rz, rx, imm, addr);
show_regs(regs);
bust_spinlocks(0);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h
index 02b72a000767..72e1b2aa29a0 100644
--- a/arch/csky/include/asm/bitops.h
+++ b/arch/csky/include/asm/bitops.h
@@ -59,7 +59,6 @@ static __always_inline unsigned long __fls(unsigned long x)
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c
index 2020af88b636..6e426fba0119 100644
--- a/arch/csky/kernel/traps.c
+++ b/arch/csky/kernel/traps.c
@@ -109,7 +109,7 @@ void die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception");
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 466ad949818a..7215a46b6b8e 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -67,7 +67,7 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr)
pr_alert("Unable to handle kernel paging request at virtual "
"addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
die(regs, "Oops");
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile
index 5942793f77a0..6ab2fa5ba105 100644
--- a/arch/h8300/boot/compressed/Makefile
+++ b/arch/h8300/boot/compressed/Makefile
@@ -30,9 +30,11 @@ $(obj)/vmlinux.bin: vmlinux FORCE
suffix-$(CONFIG_KERNEL_GZIP) := gzip
suffix-$(CONFIG_KERNEL_LZO) := lzo
+compress-$(CONFIG_KERNEL_GZIP) := gzip
+compress-$(CONFIG_KERNEL_LZO) := lzo_with_size
$(obj)/vmlinux.bin.$(suffix-y): $(obj)/vmlinux.bin FORCE
- $(call if_changed,$(suffix-y))
+ $(call if_changed,$(compress-y))
LDFLAGS_piggy.o := -r --format binary --oformat elf32-h8300-linux -T
OBJCOPYFLAGS := -O binary
diff --git a/arch/h8300/boot/dts/Makefile b/arch/h8300/boot/dts/Makefile
index 69fcd817892c..c36bbd1f2592 100644
--- a/arch/h8300/boot/dts/Makefile
+++ b/arch/h8300/boot/dts/Makefile
@@ -1,9 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""'
-BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_H8300_BUILTIN_DTB)).dtb.o
-endif
-
-obj-y += $(BUILTIN_DTB)
+obj-y += $(addsuffix .dtb.o, $(CONFIG_H8300_BUILTIN_DTB))
dtb-$(CONFIG_H8300H_SIM) := h8300h_sim.dtb
dtb-$(CONFIG_H8S_SIM) := h8s_sim.dtb
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index c867a80cab5b..4489e3d6edd3 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -168,7 +168,6 @@ static inline unsigned long __ffs(unsigned long word)
return result;
}
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index bdbe988d8dbc..a92c39e03802 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
#include <linux/mm_types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -106,7 +107,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err)
dump(fp);
spin_unlock_irq(&die_lock);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
static int kstack_depth_to_print = 24;
diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c
index d4bc9c16f2df..b465441f490d 100644
--- a/arch/h8300/mm/fault.c
+++ b/arch/h8300/mm/fault.c
@@ -51,7 +51,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
printk(" at virtual address %08lx\n", address);
if (!user_mode(regs))
die("Oops", regs, error_code);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
return 1;
}
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 71429f756af0..75d6ba3643b8 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -271,7 +271,6 @@ static inline unsigned long __fls(unsigned long word)
}
#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index edfc35dafeb1..1240f038cce0 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -214,7 +214,7 @@ int die(const char *str, struct pt_regs *regs, long err)
panic("Fatal exception");
oops_exit();
- do_exit(err);
+ make_task_dead(err);
return 0;
}
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index ef32c5a84ff3..4fac4b9eb316 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -98,11 +98,9 @@ good_area:
/* The most common case -- we are done. */
if (likely(!(fault & VM_FAULT_ERROR))) {
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1e33666fa679..703952819e10 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config IA64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
select HAVE_FUNCTION_TRACER
+ select HAVE_SETUP_PER_CPU_AREA
select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_VIRT_CPU_ACCOUNTING
@@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
config DMI
bool
default y
@@ -292,6 +290,7 @@ config NUMA
bool "NUMA support"
depends on !FLATMEM
select SMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option is for configuring high-end multiprocessor
@@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
def_bool y
depends on NUMA
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool NUMA
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 2f24ee6459d2..577be93c0818 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -441,8 +441,6 @@ static __inline__ unsigned long __arch_hweight64(unsigned long x)
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
#include <asm-generic/bitops/le.h>
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 5bfc79be4cef..23c203639a96 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -176,7 +176,7 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
spin_unlock(&mca_bh_lock);
/* This process is about to be killed itself */
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
/**
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 2cba53c1da82..360f36b0eb3f 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -848,7 +848,7 @@ register_unwind_table (struct module *mod)
{
struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
- struct unw_table_entry tmp, *e1, *e2, *core, *init;
+ struct unw_table_entry *e1, *e2, *core, *init;
unsigned long num_init = 0, num_core = 0;
/* First, count how many init and core unwind-table entries there are. */
@@ -865,9 +865,7 @@ register_unwind_table (struct module *mod)
for (e1 = start; e1 < end; ++e1) {
for (e2 = e1 + 1; e2 < end; ++e2) {
if (e2->start_offset < e1->start_offset) {
- tmp = *e1;
- *e1 = *e2;
- *e2 = tmp;
+ swap(*e1, *e2);
}
}
}
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index a25ab9b37953..bd3ba276e69c 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -282,7 +282,7 @@ salinfo_event_open(struct inode *inode, struct file *file)
static ssize_t
salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
char cmd[32];
size_t size;
int i, n, cpu = -1;
@@ -340,7 +340,7 @@ static const struct proc_ops salinfo_event_proc_ops = {
static int
salinfo_log_open(struct inode *inode, struct file *file)
{
- struct salinfo_data *data = PDE_DATA(inode);
+ struct salinfo_data *data = pde_data(inode);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -365,7 +365,7 @@ salinfo_log_open(struct inode *inode, struct file *file)
static int
salinfo_log_release(struct inode *inode, struct file *file)
{
- struct salinfo_data *data = PDE_DATA(inode);
+ struct salinfo_data *data = pde_data(inode);
if (data->state == STATE_NO_DATA) {
vfree(data->log_buffer);
@@ -433,7 +433,7 @@ retry:
static ssize_t
salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
u8 *buf;
u64 bufsize;
@@ -494,7 +494,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
static ssize_t
salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
- struct salinfo_data *data = PDE_DATA(file_inode(file));
+ struct salinfo_data *data = pde_data(file_inode(file));
char cmd[32];
size_t size;
u32 offset;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 31fb84de2d21..5010348fa21b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -208,10 +208,7 @@ sort_regions (struct rsvd_region *rsvd_region, int max)
while (max--) {
for (j = 0; j < max; ++j) {
if (rsvd_region[j].start > rsvd_region[j+1].start) {
- struct rsvd_region tmp;
- tmp = rsvd_region[j];
- rsvd_region[j] = rsvd_region[j + 1];
- rsvd_region[j + 1] = tmp;
+ swap(rsvd_region[j], rsvd_region[j + 1]);
}
}
}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 707ae121f6d3..78b1d03e86e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -370,3 +370,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 3639e0a7cb3b..e4992917a24b 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -264,6 +264,7 @@ static struct attribute * cache_default_attrs[] = {
&shared_cpu_map.attr,
NULL
};
+ATTRIBUTE_GROUPS(cache_default);
#define to_object(k) container_of(k, struct cache_info, kobj)
#define to_attr(a) container_of(a, struct cache_attr, attr)
@@ -284,7 +285,7 @@ static const struct sysfs_ops cache_sysfs_ops = {
static struct kobj_type cache_ktype = {
.sysfs_ops = &cache_sysfs_ops,
- .default_attrs = cache_default_attrs,
+ .default_groups = cache_default_groups,
};
static struct kobj_type cache_ktype_percpu_entry = {
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e13cb905930f..753642366e12 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *regs, long err)
if (panic_on_oops)
panic("Fatal exception");
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
return 0;
}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 51883a66aeb5..816803636a75 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -171,7 +171,7 @@ failed:
* @n_pages: number of contiguous pages to allocate
*
* Allocate the specified number of contiguous uncached pages on the
- * the requested node. If not enough contiguous uncached pages are available
+ * requested node. If not enough contiguous uncached pages are available
* on the requested node, roundrobin starting with the next higher node.
*/
unsigned long uncached_alloc_page(int starting_nid, int n_pages)
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 02de2e70c587..07379d1a227f 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -156,17 +156,15 @@ retry:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
@@ -259,7 +257,7 @@ retry:
regs = NULL;
bust_spinlocks(0);
if (regs)
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
return;
out_of_memory:
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index a4b6c7108465..bc9952f8be66 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -45,7 +45,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 2db721965520..a77269c6e5ba 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -41,7 +41,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index c266a704eecd..7a74efa6b9a1 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -48,7 +48,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f644f08dd6ed..a5323bf2eb33 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e4924650b687..5e80aa0869d5 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -40,7 +40,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 24113871ea76..e84326a3f62d 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6a7e4be70eea..337552f43339 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -59,7 +59,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 1d223247aff0..7b688f7d272a 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -37,7 +37,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 961f789f96c9..7c2cb31d63dd 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ff4b5e469390..ca43897af26d 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 5f228621d0cc..e3d515f37144 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a600cb9e68c2..d601606c969b 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
-CONFIG_CLEANCACHE=y
CONFIG_ZPOOL=m
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 7b93e1fd8ffa..51283db53667 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -529,6 +529,4 @@ static inline int __fls(int x)
#include <asm-generic/bitops/le.h>
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#endif /* _M68K_BITOPS_H */
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 94b3b274186d..aa3a0b8d07e9 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -273,17 +273,7 @@ out_eio:
asmlinkage void syscall_trace(void)
{
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ ptrace_report_syscall(0);
}
#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 45bc32a41b90..b1f3940bc298 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -449,3 +449,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 34d6458340b0..59fc63feb0dc 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1131,7 +1131,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr)
pr_crit("%s: %08x\n", str, nr);
show_registers(fp);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
asmlinkage void set_esp0(unsigned long ssp)
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index ef46e77e97a5..1493cf5eac1e 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -48,7 +48,7 @@ int send_fault_sig(struct pt_regs *regs)
pr_alert("Unable to handle kernel access");
pr_cont(" at virtual address %p\n", addr);
die_if_kernel("Oops", regs, 0 /*error_code*/);
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
return 1;
@@ -153,18 +153,16 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index e775a696aa6f..1826d9ce4459 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -5,10 +5,10 @@ UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\"
# What CPU version are we building for, and crack it open
# as major.minor.rev
-CPU_VER := $(shell echo $(CONFIG_XILINX_MICROBLAZE0_HW_VER))
-CPU_MAJOR := $(shell echo $(CPU_VER) | cut -d '.' -f 1)
-CPU_MINOR := $(shell echo $(CPU_VER) | cut -d '.' -f 2)
-CPU_REV := $(shell echo $(CPU_VER) | cut -d '.' -f 3)
+CPU_VER := $(CONFIG_XILINX_MICROBLAZE0_HW_VER)
+CPU_MAJOR := $(word 1, $(subst ., , $(CPU_VER)))
+CPU_MINOR := $(word 2, $(subst ., , $(CPU_VER)))
+CPU_REV := $(word 3, $(subst ., , $(CPU_VER)))
export CPU_VER CPU_MAJOR CPU_MINOR CPU_REV
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 908788497b28..fd153d5fab98 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs *fp, long err)
pr_warn("Oops: %s, sig: %ld\n", str, err);
show_regs(fp);
spin_unlock_irq(&die_lock);
- /* do_exit() should take care of panic'ing from an interrupt
+ /* make_task_dead() should take care of panic'ing from an interrupt
* context so we don't handle it here
*/
- do_exit(err);
+ make_task_dead(err);
}
/* for user application debugging */
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 2204bde3ce4a..820145e47350 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -455,3 +455,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index b3fed2cecf84..a9626e6a68af 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -232,18 +232,16 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
-
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
-
- goto retry;
- }
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 3dd8c4618293..058446f01487 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -32,7 +32,6 @@ config MIPS
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
@@ -2674,6 +2673,8 @@ config NUMA
bool "NUMA Support"
depends on SYS_SUPPORTS_NUMA
select SMP
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option improves performance on systems with more
@@ -2684,14 +2685,6 @@ config NUMA
config SYS_SUPPORTS_NUMA
bool
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RELOCATABLE
bool "Relocatable kernel"
depends on SYS_SUPPORTS_RELOCATABLE
diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c
index 2a2346a99bcb..42130914a3c2 100644
--- a/arch/mips/bcm63xx/dev-wdt.c
+++ b/arch/mips/bcm63xx/dev-wdt.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <linux/platform_data/bcm7038_wdt.h>
#include <bcm63xx_cpu.h>
static struct resource wdt_resources[] = {
@@ -19,11 +20,18 @@ static struct resource wdt_resources[] = {
},
};
+static struct bcm7038_wdt_platform_data bcm63xx_wdt_pdata = {
+ .clk_name = "periph",
+};
+
static struct platform_device bcm63xx_wdt_device = {
.name = "bcm63xx-wdt",
.id = -1,
.num_resources = ARRAY_SIZE(wdt_resources),
.resource = wdt_resources,
+ .dev = {
+ .platform_data = &bcm63xx_wdt_pdata,
+ },
};
int __init bcm63xx_wdt_register(void)
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 8b03ef13133a..5a15d51e8884 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -64,12 +64,12 @@ $(obj)/vmlinux.bin: $(KBUILD_IMAGE) FORCE
$(call if_changed,objcopy)
tool_$(CONFIG_KERNEL_GZIP) = gzip
-tool_$(CONFIG_KERNEL_BZIP2) = bzip2
-tool_$(CONFIG_KERNEL_LZ4) = lz4
-tool_$(CONFIG_KERNEL_LZMA) = lzma
-tool_$(CONFIG_KERNEL_LZO) = lzo
-tool_$(CONFIG_KERNEL_XZ) = xzkern
-tool_$(CONFIG_KERNEL_ZSTD) = zstd22
+tool_$(CONFIG_KERNEL_BZIP2) = bzip2_with_size
+tool_$(CONFIG_KERNEL_LZ4) = lz4_with_size
+tool_$(CONFIG_KERNEL_LZMA) = lzma_with_size
+tool_$(CONFIG_KERNEL_LZO) = lzo_with_size
+tool_$(CONFIG_KERNEL_XZ) = xzkern_with_size
+tool_$(CONFIG_KERNEL_ZSTD) = zstd22_with_size
targets += vmlinux.bin.z
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 3812082b8295..b4bf754f7db3 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -444,7 +444,6 @@ static inline int ffs(int word)
}
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/find.h>
#ifdef __KERNEL__
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 999bdd4f25b4..717716cc51c5 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -898,7 +898,6 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
int kvm_arch_flush_remote_tlb(struct kvm *kvm);
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 72d02d363f36..253ff994ed2e 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -388,3 +388,4 @@
# 447 reserved for memfd_secret
448 n32 process_mrelease sys_process_mrelease
449 n32 futex_waitv sys_futex_waitv
+450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index e2c481fcede6..3f1886ad9d80 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -364,3 +364,4 @@
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 3714c97b2643..8f243e35a7b2 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -437,3 +437,4 @@
# 447 reserved for memfd_secret
448 o32 process_mrelease sys_process_mrelease
449 o32 futex_waitv sys_futex_waitv
+450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index d26b0fb8ea06..a486486b2355 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -422,7 +422,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
if (regs && kexec_should_crash(current))
crash_kexec(regs);
- do_exit(sig);
+ make_task_dead(sig);
}
extern struct exception_table_entry __start___dbe_table[];
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index a77297480f56..91d197bee9c0 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
+ select INTERVAL_TREE
help
Support for hosting Guest kernels.
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index d3710959da55..21ff75bcdbc4 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -2,9 +2,10 @@
# Makefile for KVM support for MIPS
#
+include $(srctree)/virt/kvm/Makefile.kvm
+
ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
-kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
kvm-y += mips.o emulate.o entry.o \
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 22e745e49b0a..b494d8d39290 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -952,7 +952,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
if (!vcpu->arch.pending_exceptions) {
kvm_vz_lose_htimer(vcpu);
vcpu->arch.wait = 1;
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
/*
* We we are runnable, then definitely go off to user space to
diff --git a/arch/mips/kvm/loongson_ipi.c b/arch/mips/kvm/loongson_ipi.c
index 3681fc8fba38..5d53f32d837c 100644
--- a/arch/mips/kvm/loongson_ipi.c
+++ b/arch/mips/kvm/loongson_ipi.c
@@ -120,7 +120,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
s->status |= data;
irq.cpu = id;
irq.irq = 6;
- kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+ kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
break;
case CORE0_CLEAR_OFF:
@@ -128,7 +128,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
if (!s->status) {
irq.cpu = id;
irq.irq = -6;
- kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+ kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
}
break;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index aa20d074d388..e59cb6246f76 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -171,25 +171,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
}
-void kvm_mips_free_vcpus(struct kvm *kvm)
-{
- unsigned int i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_vcpu_destroy(vcpu);
- }
-
- mutex_lock(&kvm->lock);
-
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
-
- mutex_unlock(&kvm->lock);
-}
-
static void kvm_mips_free_gpa_pt(struct kvm *kvm)
{
/* It should always be safe to remove after flushing the whole range */
@@ -199,7 +180,7 @@ static void kvm_mips_free_gpa_pt(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_mips_free_vcpus(kvm);
+ kvm_destroy_vcpus(kvm);
kvm_mips_free_gpa_pt(kvm);
}
@@ -233,25 +214,20 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
int needs_flush;
- kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
- __func__, kvm, mem->slot, mem->guest_phys_addr,
- mem->memory_size, mem->userspace_addr);
-
/*
* If dirty page logging is enabled, write protect all pages in the slot
* ready for dirty logging.
@@ -498,7 +474,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
if (irq->cpu == -1)
dvcpu = vcpu;
else
- dvcpu = vcpu->kvm->vcpus[irq->cpu];
+ dvcpu = kvm_get_vcpu(vcpu->kvm, irq->cpu);
if (intr == 2 || intr == 3 || intr == 4 || intr == 6) {
kvm_mips_callbacks->queue_io_int(dvcpu, irq);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index e7abda9c013f..44f98100e84e 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,18 +171,17 @@ good_area:
goto do_sigbus;
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 325e1552cbea..5a8002839550 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -519,17 +519,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(cpu_to_node(from), cpu_to_node(to));
}
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
+static int __init pcpu_cpu_to_node(int cpu)
{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- cpu_to_node(cpu));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -545,7 +537,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("Failed to initialize percpu areas.");
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
index bd71f5b14238..d6efffd4dd20 100644
--- a/arch/mips/ralink/mt7621.c
+++ b/arch/mips/ralink/mt7621.c
@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <linux/sys_soc.h>
#include <linux/memblock.h>
+#include <linux/pci.h>
+#include <linux/bug.h>
#include <asm/bootinfo.h>
#include <asm/mipsregs.h>
@@ -22,6 +24,35 @@
static void *detect_magic __initdata = detect_memory_region;
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ struct resource_entry *entry;
+ resource_size_t mask;
+
+ entry = resource_list_first_type(&bridge->windows, IORESOURCE_MEM);
+ if (!entry) {
+ pr_err("Cannot get memory resource\n");
+ return -EINVAL;
+ }
+
+ if (mips_cps_numiocu(0)) {
+ /*
+ * Hardware doesn't accept mask values with 1s after
+ * 0s (e.g. 0xffef), so warn if that's happen
+ */
+ mask = ~(entry->res->end - entry->res->start) & CM_GCR_REGn_MASK_ADDRMASK;
+ WARN_ON(mask && BIT(ffz(~mask)) - 1 != ~mask);
+
+ write_gcr_reg1_base(entry->res->start);
+ write_gcr_reg1_mask(mask | CM_GCR_REGn_MASK_CMTGT_IOCU0);
+ pr_info("PCI coherence region base: 0x%08llx, mask/settings: 0x%08llx\n",
+ (unsigned long long)read_gcr_reg1_base(),
+ (unsigned long long)read_gcr_reg1_mask());
+ }
+
+ return 0;
+}
+
phys_addr_t mips_cpc_default_phys_base(void)
{
panic("Cannot detect cpc address");
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 797ad9b450af..b33d5d81b6ae 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -31,12 +31,6 @@ core-y += arch/nds32/kernel/ arch/nds32/mm/
core-$(CONFIG_FPU) += arch/nds32/math-emu/
libs-y += arch/nds32/lib/
-ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
-BUILTIN_DTB := y
-else
-BUILTIN_DTB := n
-endif
-
ifdef CONFIG_CPU_LITTLE_ENDIAN
KBUILD_CFLAGS += $(call cc-option, -EL)
KBUILD_AFLAGS += $(call cc-option, -EL)
diff --git a/arch/nds32/boot/dts/Makefile b/arch/nds32/boot/dts/Makefile
index f84bd529b6fd..4fc69562eae8 100644
--- a/arch/nds32/boot/dts/Makefile
+++ b/arch/nds32/boot/dts/Makefile
@@ -1,7 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
-BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_NDS32_BUILTIN_DTB)).dtb.o
-else
-BUILTIN_DTB :=
-endif
-obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+obj-$(CONFIG_OF) += $(addsuffix .dtb.o, $(CONFIG_NDS32_BUILTIN_DTB))
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
index 9edd7ed7d7bf..701c09a668de 100644
--- a/arch/nds32/kernel/fpu.c
+++ b/arch/nds32/kernel/fpu.c
@@ -223,7 +223,7 @@ inline void handle_fpu_exception(struct pt_regs *regs)
}
} else if (fpcsr & FPCSR_mskRIT) {
if (!user_mode(regs))
- do_exit(SIGILL);
+ make_task_dead(SIGILL);
si_signo = SIGILL;
}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index ca75d475eda4..c0a8f3344fb9 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -141,7 +141,7 @@ void __noreturn die(const char *str, struct pt_regs *regs, int err)
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
EXPORT_SYMBOL(die);
@@ -240,7 +240,7 @@ void unhandled_interruption(struct pt_regs *regs)
pr_emerg("unhandled_interruption\n");
show_regs(regs);
if (!user_mode(regs))
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
force_sig(SIGKILL);
}
@@ -251,7 +251,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr,
addr, type);
show_regs(regs);
if (!user_mode(regs))
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
force_sig(SIGKILL);
}
@@ -278,7 +278,7 @@ void do_revinsn(struct pt_regs *regs)
pr_emerg("Reserved Instruction\n");
show_regs(regs);
if (!user_mode(regs))
- do_exit(SIGILL);
+ make_task_dead(SIGILL);
force_sig(SIGILL);
}
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 1d139b117168..636977a1c8b9 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -230,16 +230,14 @@ good_area:
goto bad_area;
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
-
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- goto retry;
- }
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
+
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/nios2/boot/dts/Makefile b/arch/nios2/boot/dts/Makefile
index a91a0b09be63..e9e31bb40df8 100644
--- a/arch/nios2/boot/dts/Makefile
+++ b/arch/nios2/boot/dts/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := $(patsubst "%.dts",%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE))
+obj-y := $(patsubst %.dts,%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE))
dtstree := $(srctree)/$(src)
dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 596986a74a26..85ac49d64cf7 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -37,10 +37,10 @@ void die(const char *str, struct pt_regs *regs, long err)
show_regs(regs);
spin_unlock_irq(&die_lock);
/*
- * do_exit() should take care of panic'ing from an interrupt
+ * make_task_dead() should take care of panic'ing from an interrupt
* context so we don't handle it here
*/
- do_exit(err);
+ make_task_dead(err);
}
void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr)
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 9476feecf512..a32f14cd72f2 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -149,18 +149,16 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index c2491b295d60..f724b3f1aeed 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -10,6 +10,7 @@ config OPENRISC
select ARCH_HAS_DMA_SET_UNCACHED
select ARCH_HAS_DMA_CLEAR_UNCACHED
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select COMMON_CLK
select OF
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
diff --git a/arch/openrisc/boot/dts/Makefile b/arch/openrisc/boot/dts/Makefile
index 17dd791a833f..13db5a2aab52 100644
--- a/arch/openrisc/boot/dts/Makefile
+++ b/arch/openrisc/boot/dts/Makefile
@@ -1,9 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ifneq '$(CONFIG_OPENRISC_BUILTIN_DTB)' '""'
-BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_OPENRISC_BUILTIN_DTB)).dtb.o
-else
-BUILTIN_DTB :=
-endif
-obj-y += $(BUILTIN_DTB)
+obj-y += $(addsuffix .dtb.o, $(CONFIG_OPENRISC_BUILTIN_DTB))
#DTC_FLAGS ?= -p 1024
diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h
index 7f1ca35213d8..d773ed938acb 100644
--- a/arch/openrisc/include/asm/bitops.h
+++ b/arch/openrisc/include/asm/bitops.h
@@ -30,7 +30,6 @@
#include <asm/bitops/fls.h>
#include <asm/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/arch/openrisc/include/asm/syscalls.h b/arch/openrisc/include/asm/syscalls.h
index 3a7eeae6f56a..aa1c7e98722e 100644
--- a/arch/openrisc/include/asm/syscalls.h
+++ b/arch/openrisc/include/asm/syscalls.h
@@ -22,9 +22,11 @@ asmlinkage long sys_or1k_atomic(unsigned long type, unsigned long *v1,
asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid, int tls);
+asmlinkage long __sys_clone3(struct clone_args __user *uargs, size_t size);
asmlinkage long __sys_fork(void);
#define sys_clone __sys_clone
+#define sys_clone3 __sys_clone3
#define sys_fork __sys_fork
#endif /* __ASM_OPENRISC_SYSCALLS_H */
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 59c6d3aa7081..3ca1b1f490b9 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -1001,11 +1001,10 @@ ENTRY(ret_from_fork)
l.lwz r11,PT_GPR11(r1)
/* The syscall fast path return expects call-saved registers
- * r12-r28 to be untouched, so we restore them here as they
+ * r14-r28 to be untouched, so we restore them here as they
* will have been effectively clobbered when arriving here
* via the call to switch()
*/
- l.lwz r12,PT_GPR12(r1)
l.lwz r14,PT_GPR14(r1)
l.lwz r16,PT_GPR16(r1)
l.lwz r18,PT_GPR18(r1)
@@ -1037,10 +1036,10 @@ ENTRY(ret_from_fork)
/* _switch MUST never lay on page boundry, cause it runs from
* effective addresses and beeing interrupted by iTLB miss would kill it.
- * dTLB miss seams to never accour in the bad place since data accesses
+ * dTLB miss seems to never accour in the bad place since data accesses
* are from task structures which are always page aligned.
*
- * The problem happens in RESTORE_ALL_NO_R11 where we first set the EPCR
+ * The problem happens in RESTORE_ALL where we first set the EPCR
* register, then load the previous register values and only at the end call
* the l.rfe instruction. If get TLB miss in beetwen the EPCR register gets
* garbled and we end up calling l.rfe with the wrong EPCR. (same probably
@@ -1068,9 +1067,8 @@ ENTRY(_switch)
/* No need to store r1/PT_SP as it goes into KSP below */
l.sw PT_GPR2(r1),r2
l.sw PT_GPR9(r1),r9
- /* This is wrong, r12 shouldn't be here... but GCC is broken for the time being
- * and expects r12 to be callee-saved... */
- l.sw PT_GPR12(r1),r12
+
+ /* Save callee-saved registers to the new pt_regs */
l.sw PT_GPR14(r1),r14
l.sw PT_GPR16(r1),r16
l.sw PT_GPR18(r1),r18
@@ -1111,9 +1109,7 @@ ENTRY(_switch)
/* No need to restore r10 */
/* ...and do not restore r11 */
- /* This is wrong, r12 shouldn't be here... but GCC is broken for the time being
- * and expects r12 to be callee-saved... */
- l.lwz r12,PT_GPR12(r1)
+ /* Restore callee-saved registers */
l.lwz r14,PT_GPR14(r1)
l.lwz r16,PT_GPR16(r1)
l.lwz r18,PT_GPR18(r1)
@@ -1166,15 +1162,18 @@ _fork_save_extra_regs_and_call:
ENTRY(__sys_clone)
l.movhi r29,hi(sys_clone)
- l.ori r29,r29,lo(sys_clone)
l.j _fork_save_extra_regs_and_call
- l.nop
+ l.ori r29,r29,lo(sys_clone)
+
+ENTRY(__sys_clone3)
+ l.movhi r29,hi(sys_clone3)
+ l.j _fork_save_extra_regs_and_call
+ l.ori r29,r29,lo(sys_clone3)
ENTRY(__sys_fork)
l.movhi r29,hi(sys_fork)
- l.ori r29,r29,lo(sys_fork)
l.j _fork_save_extra_regs_and_call
- l.nop
+ l.ori r29,r29,lo(sys_fork)
ENTRY(sys_rt_sigreturn)
l.jal _sys_rt_sigreturn
diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c
index a6e69386f82a..6d18989d63d0 100644
--- a/arch/openrisc/kernel/time.c
+++ b/arch/openrisc/kernel/time.c
@@ -20,6 +20,7 @@
#include <linux/clockchips.h>
#include <linux/irq.h>
#include <linux/io.h>
+#include <linux/of_clk.h>
#include <asm/cpuinfo.h>
@@ -169,4 +170,7 @@ void __init time_init(void)
openrisc_timer_init();
openrisc_clockevent_init();
+
+ of_clk_init(NULL);
+ timer_probe();
}
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 0898cb159fac..0446a3c34372 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -212,7 +212,7 @@ void __noreturn die(const char *str, struct pt_regs *regs, long err)
__asm__ __volatile__("l.nop 1");
do {} while (1);
#endif
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
/* This is normally the 'Oops' routine */
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index f0fa6394a58e..80bb66ad42f6 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -177,18 +177,16 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- /*RGD modeled on Cris */
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ /*RGD modeled on Cris */
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index 116bd5c1873c..a294a1b58ee7 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -50,8 +50,6 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -R .note -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZ4) := lz4
@@ -59,18 +57,18 @@ suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_XZ) := xz
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,xzkern)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern_with_size)
LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index daa2afd974fb..0ec9cfc5131f 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -203,7 +203,6 @@ static __inline__ int fls(unsigned int x)
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index b669f4b9040b..3a3d05438408 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -289,6 +289,7 @@ extern int _parisc_requires_coherency;
extern int running_on_qemu;
+extern void __noreturn toc_intr(struct pt_regs *regs);
extern void toc_handler(void);
extern unsigned int toc_handler_size;
extern unsigned int toc_handler_csum;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index cceb09855e03..b91cb45ffd4e 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -48,6 +48,7 @@ struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
void __init setup_cmdline(char **cmdline_p)
{
extern unsigned int boot_args[];
+ char *p;
/* Collect stuff passed in from the boot loader */
@@ -56,9 +57,19 @@ void __init setup_cmdline(char **cmdline_p)
/* called from hpux boot loader */
boot_command_line[0] = '\0';
} else {
- strlcpy(boot_command_line, (char *)__va(boot_args[1]),
+ strscpy(boot_command_line, (char *)__va(boot_args[1]),
COMMAND_LINE_SIZE);
+ /* autodetect console type (if not done by palo yet) */
+ p = boot_command_line;
+ if (!str_has_prefix(p, "console=") && !strstr(p, " console=")) {
+ strlcat(p, " console=", COMMAND_LINE_SIZE);
+ if (PAGE0->mem_cons.cl_class == CL_DUPLEX)
+ strlcat(p, "ttyS0", COMMAND_LINE_SIZE);
+ else
+ strlcat(p, "tty0", COMMAND_LINE_SIZE);
+ }
+
#ifdef CONFIG_BLK_DEV_INITRD
if (boot_args[2] != 0) /* did palo pass us a ramdisk? */
{
@@ -68,7 +79,7 @@ void __init setup_cmdline(char **cmdline_p)
#endif
}
- strcpy(command_line, boot_command_line);
+ strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 358c00000755..68b46fe2f17c 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -447,3 +447,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/parisc/kernel/toc.c b/arch/parisc/kernel/toc.c
index be9a0bebe61e..e4b48d07afbd 100644
--- a/arch/parisc/kernel/toc.c
+++ b/arch/parisc/kernel/toc.c
@@ -10,9 +10,10 @@
#include <asm/pdc.h>
#include <asm/pdc_chassis.h>
#include <asm/ldcw.h>
+#include <asm/processor.h>
static unsigned int __aligned(16) toc_lock = 1;
-DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack);
+DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack) __visible;
static void toc20_to_pt_regs(struct pt_regs *regs, struct pdc_toc_pim_20 *toc)
{
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index eb41fece1910..b6fdebddc8e9 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -269,7 +269,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
panic("Fatal exception");
oops_exit();
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
/* gdb uses break 4,8 */
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 147868427b7c..e9eabf8f14d7 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -324,16 +324,14 @@ good_area:
goto bad_area;
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ if (fault & VM_FAULT_RETRY) {
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
mmap_read_unlock(mm);
return;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0631c9241af3..b779603978e1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K)
default 11 # 11 = 23 (8MB) - 12 (4K)
-config HAVE_SETUP_PER_CPU_AREA
- def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if PPC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if PPC64
-
config NR_IRQS
int "Number of virtual interrupt numbers"
range 32 1048576
@@ -241,6 +232,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
+ select HAVE_SETUP_PER_CPU_AREA if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
@@ -255,6 +247,8 @@ config PPC
select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
select NEED_SG_DMA_LENGTH
select OF
select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
@@ -660,6 +654,7 @@ config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
depends on PPC64 && SMP
default y if PPC_PSERIES || PPC_POWERNV
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -673,10 +668,6 @@ config NODES_SHIFT
default "4"
depends on NUMA
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
config HAVE_MEMORYLESS_NODES
def_bool y
depends on NUMA
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 9993c6256ad2..4b4827c475c6 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -365,7 +365,7 @@ image-$(CONFIG_PPC_PMAC) += zImage.coff zImage.miboot
endif
# Allow extra targets to be added to the defconfig
-image-y += $(subst ",,$(CONFIG_EXTRA_TARGETS))
+image-y += $(CONFIG_EXTRA_TARGETS)
initrd- := $(patsubst zImage%, zImage.initrd%, $(image-))
initrd-y := $(patsubst zImage%, zImage.initrd%, \
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
index c90702b04a53..48e5cd61599c 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
@@ -79,6 +79,7 @@ fman0: fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xfc000 0x1000>;
+ fsl,erratum-a009885;
};
xmdio0: mdio@fd000 {
@@ -86,6 +87,7 @@ fman0: fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xfd000 0x1000>;
+ fsl,erratum-a009885;
};
};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index e9c945b123c6..e46143c32308 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -168,6 +168,11 @@
interrupts = <14>;
};
+ srnprot@d800060 {
+ compatible = "nintendo,hollywood-srnprot";
+ reg = <0x0d800060 0x4>;
+ };
+
GPIO: gpio@d8000c0 {
#gpio-cells = <2>;
compatible = "nintendo,hollywood-gpio";
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 24c0e0ea5aeb..91a1b99f4e8f 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -68,7 +68,7 @@ CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index a0c45bf2bfb1..0ab78c51455d 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -98,7 +98,7 @@ CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
CONFIG_LEDS_TRIGGER_PANIC=y
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
CONFIG_NVMEM_NINTENDO_OTP=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index a05d8c62cbea..ea5d27dda8cf 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -328,8 +328,6 @@ unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/hweight.h>
#endif
-#include <asm-generic/bitops/find.h>
-
/* wrappers that deal with KASAN instrumentation */
#include <asm-generic/bitops/instrumented-atomic.h>
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 17263276189e..a770443cd6e0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -752,6 +752,7 @@ struct kvm_vcpu_arch {
u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
+ struct rcuwait wait;
struct rcuwait *waitp;
struct kvmppc_vcore *vcore;
int ret;
@@ -867,6 +868,5 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_exit(void) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 33db83b82fbd..a14dbcd1b8ce 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -200,12 +200,11 @@ extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
@@ -274,12 +273,11 @@ struct kvmppc_ops {
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
int (*prepare_memory_region)(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change);
void (*commit_memory_region)(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change);
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 877817471e3c..6a029f2378e1 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes
loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos,
- PDE_DATA(file_inode(file)), PAGE_SIZE);
+ pde_data(file_inode(file)), PAGE_SIZE);
}
static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
@@ -34,7 +34,7 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return -EINVAL;
remap_pfn_range(vma, vma->vm_start,
- __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
+ __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
return 0;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d87f7c1103ce..be8577ac9397 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,50 +771,6 @@ void __init emergency_stack_init(void)
}
#ifdef CONFIG_SMP
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init pcpu_populate_pte(unsigned long addr)
+static __init int pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return early_cpu_to_node(cpu);
}
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
@@ -900,7 +816,7 @@ void __init setup_per_cpu_areas(void)
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_alloc_bootmem, pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -908,8 +824,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
- rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 15109af9d075..2600b4237292 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11741703d26e..a08bb7cefdc5 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+ make_task_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
@@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs)
void die_mce(const char *str, struct pt_regs *regs, long err)
{
/*
- * The machine check wants to kill the interrupted context, but
- * do_exit() checks for in_interrupt() and panics in that case, so
- * exit the irq/nmi before calling die.
+ * The machine check wants to kill the interrupted context,
+ * but make_task_dead() checks for in_interrupt() and panics
+ * in that case, so exit the irq/nmi before calling die.
*/
if (in_nmi())
nmi_exit();
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f947b77386a9..18e58085447c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
+ select INTERVAL_TREE
config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 583c14ef596e..9bdfc8b50899 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -4,11 +4,8 @@
#
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-KVM := ../../../virt/kvm
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
-common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
-common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
+include $(srctree)/virt/kvm/Makefile.kvm
common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
@@ -125,9 +122,8 @@ kvm-book3s_32-objs := \
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
-kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
+kvm-y += $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_E500V2) += kvm.o
obj-$(CONFIG_KVM_E500MC) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b785f6772391..6d525285dbe8 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -847,21 +847,19 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
- change);
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
+ kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
}
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 3fbd570f9c1e..0215f32932a9 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -337,7 +337,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *v;
/* flush this VA on all cpus */
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index feee40cb2ba1..61290282fd9e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -530,7 +530,7 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
bool large)
{
u64 mask = 0xFFFFFFFFFULL;
- long i;
+ unsigned long i;
struct kvm_vcpu *v;
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c63e263312a4..213232914367 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -734,11 +734,11 @@ void kvmppc_rmap_reset(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int srcu_idx;
+ int srcu_idx, bkt;
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
+ kvm_for_each_memslot(memslot, bkt, slots) {
/* Mutual exclusion with kvm_unmap_hva_range etc. */
spin_lock(&kvm->mmu_lock);
/*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f64e45d6c0f4..d1817cd9a691 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2084,7 +2084,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
*/
if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.vcore != vc)
@@ -4798,8 +4798,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int i, r;
- unsigned long n;
+ int r;
+ unsigned long n, i;
unsigned long *buf, *p;
struct kvm_vcpu *vcpu;
@@ -4866,41 +4866,38 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
}
static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- const struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- unsigned long npages = mem->memory_size >> PAGE_SHIFT;
-
if (change == KVM_MR_CREATE) {
- unsigned long size = array_size(npages, sizeof(*slot->arch.rmap));
+ unsigned long size = array_size(new->npages, sizeof(*new->arch.rmap));
if ((size >> PAGE_SHIFT) > totalram_pages())
return -ENOMEM;
- slot->arch.rmap = vzalloc(size);
- if (!slot->arch.rmap)
+ new->arch.rmap = vzalloc(size);
+ if (!new->arch.rmap)
return -ENOMEM;
+ } else if (change != KVM_MR_DELETE) {
+ new->arch.rmap = old->arch.rmap;
}
return 0;
}
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- unsigned long npages = mem->memory_size >> PAGE_SHIFT;
-
/*
- * If we are making a new memslot, it might make
+ * If we are creating or modifying a memslot, it might make
* some address that was previously cached as emulated
* MMIO be no longer emulated MMIO, so invalidate
* all the caches of emulated MMIO translations.
*/
- if (npages)
+ if (change != KVM_MR_DELETE)
atomic64_inc(&kvm->arch.mmio_update);
/*
@@ -5901,7 +5898,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
int mmu_was_ready;
int srcu_idx;
int ret = 0;
- int i;
+ unsigned long i;
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
return ret;
@@ -5923,11 +5920,12 @@ static int kvmhv_svm_off(struct kvm *kvm)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memory_slot *memslot;
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+ int bkt;
if (!slots)
continue;
- kvm_for_each_memslot(memslot, slots) {
+ kvm_for_each_memslot(memslot, bkt, slots) {
kvmppc_uvmem_drop_pages(memslot, kvm, true);
uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
}
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index a2e34efb8d31..8f8daaeeb3b7 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -747,7 +747,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
struct kvm_nested_guest *gp;
struct kvm_nested_guest *freelist = NULL;
struct kvm_memory_slot *memslot;
- int srcu_idx;
+ int srcu_idx, bkt;
spin_lock(&kvm->mmu_lock);
for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
@@ -768,7 +768,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
}
srcu_idx = srcu_read_lock(&kvm->srcu);
- kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
kvmhv_free_memslot_nest_rmap(memslot);
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 28c436df9935..e414ca44839f 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -459,7 +459,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot, *m;
int ret = H_SUCCESS;
- int srcu_idx;
+ int srcu_idx, bkt;
kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
@@ -478,7 +478,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
/* register the memslot */
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
+ kvm_for_each_memslot(memslot, bkt, slots) {
ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
if (ret)
break;
@@ -486,7 +486,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
if (ret) {
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(m, slots) {
+ kvm_for_each_memslot(m, bkt, slots) {
if (m == memslot)
break;
__kvmppc_uvmem_memslot_delete(kvm, memslot);
@@ -647,7 +647,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
{
- int srcu_idx;
+ int srcu_idx, bkt;
struct kvm_memory_slot *memslot;
/*
@@ -662,7 +662,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
srcu_idx = srcu_read_lock(&kvm->srcu);
- kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
kvmppc_uvmem_drop_pages(memslot, kvm, false);
srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -821,7 +821,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int srcu_idx;
+ int srcu_idx, bkt;
long ret = H_SUCCESS;
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
@@ -830,7 +830,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
/* migrate any unmoved normal pfn to device pfns*/
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
+ kvm_for_each_memslot(memslot, bkt, slots) {
ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
if (ret) {
/*
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 6bc9425acb32..34a801c3604a 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -428,7 +428,7 @@ static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
/************* MMU Notifiers *************/
static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- long i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -492,7 +492,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
@@ -1899,16 +1899,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
}
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
return 0;
}
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ac14239f3424..1f10e7dfcdd0 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -376,7 +376,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ebd5d920de8c..9cc466006e8b 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -942,8 +942,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
struct kvmppc_xics *xics = m->private;
struct kvm *kvm = xics->kvm;
struct kvm_vcpu *vcpu;
- int icsid, i;
- unsigned long flags;
+ int icsid;
+ unsigned long flags, i;
unsigned long t_rm_kick_vcpu, t_rm_check_resend;
unsigned long t_rm_notify_eoi;
unsigned long t_reject, t_check_resend;
@@ -1340,7 +1340,7 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
static void kvmppc_xics_release(struct kvm_device *dev)
{
struct kvmppc_xics *xics = dev->private;
- int i;
+ unsigned long i;
struct kvm *kvm = xics->kvm;
struct kvm_vcpu *vcpu;
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 6231f76bdd66..8e4c79e2fcd8 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -116,7 +116,7 @@ static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 225008882958..e216c068075d 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -368,7 +368,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvm_vcpu *vcpu;
- int i, rc;
+ unsigned long i;
+ int rc;
lockdep_assert_held(&xive->lock);
@@ -439,7 +440,8 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
{
struct kvm_vcpu *vcpu;
- int i, rc;
+ unsigned long i;
+ int rc;
/* Locate target server */
vcpu = kvmppc_xive_find_server(kvm, *server);
@@ -1519,7 +1521,8 @@ static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
static void xive_pre_save_scan(struct kvmppc_xive *xive)
{
struct kvm_vcpu *vcpu = NULL;
- int i, j;
+ unsigned long i;
+ int j;
/*
* See comment in xive_get_source() about how this
@@ -1700,7 +1703,7 @@ static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
{
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2037,7 +2040,7 @@ static void kvmppc_xive_release(struct kvm_device *dev)
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
pr_devel("Releasing xive device\n");
@@ -2291,7 +2294,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
u64 t_vm_h_cppr = 0;
u64 t_vm_h_eoi = 0;
u64 t_vm_h_ipi = 0;
- unsigned int i;
+ unsigned long i;
if (!kvm)
return 0;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index e6a9651c6f1e..09d0657596c3 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -199,7 +199,7 @@ struct kvmppc_xive_vcpu {
static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
@@ -240,7 +240,7 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 99db9ac49901..561a5bfe0468 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -807,7 +807,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
{
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- unsigned int i;
+ unsigned long i;
pr_devel("%s\n", __func__);
@@ -916,7 +916,7 @@ static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
{
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- unsigned int i;
+ unsigned long i;
pr_devel("%s\n", __func__);
@@ -1017,7 +1017,7 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
pr_devel("Releasing xive native device\n");
@@ -1214,7 +1214,7 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
struct kvmppc_xive *xive = m->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- unsigned int i;
+ unsigned long i;
if (!kvm)
return 0;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8c15c90dd3a9..06c5830a93f9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -718,7 +718,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
hard_irq_disable();
@@ -1821,16 +1821,15 @@ void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 64eb833e9f02..051102d50c31 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
ulong param = vcpu->arch.regs.gpr[rb];
int prio = dbell2prio(rb);
int pir = param & PPC_DBELL_PIR_MASK;
- int i;
+ unsigned long i;
struct kvm_vcpu *cvcpu;
if (prio < 0)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a72920f4f221..2ad0ccd202d5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -236,7 +236,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
break;
case EV_HCALL_TOKEN(EV_IDLE):
r = EV_SUCCESS;
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
break;
default:
@@ -463,9 +463,6 @@ err_out:
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- unsigned int i;
- struct kvm_vcpu *vcpu;
-
#ifdef CONFIG_KVM_XICS
/*
* We call kick_all_cpus_sync() to ensure that all
@@ -476,14 +473,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kick_all_cpus_sync();
#endif
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_destroy(vcpu);
+ kvm_destroy_vcpus(kvm);
mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
kvmppc_core_destroy_vm(kvm);
@@ -706,20 +698,19 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- return kvmppc_core_prepare_memory_region(kvm, memslot, mem, change);
+ return kvmppc_core_prepare_memory_region(kvm, old, new, change);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- kvmppc_core_commit_memory_region(kvm, mem, old, new, change);
+ kvmppc_core_commit_memory_region(kvm, old, new, change);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -762,7 +753,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
goto out_vcpu_uninit;
- vcpu->arch.waitp = &vcpu->wait;
+ rcuwait_init(&vcpu->arch.wait);
+ vcpu->arch.waitp = &vcpu->arch.wait;
kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
return 0;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 2d4a411c7c85..eb8ecd7343a9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -517,10 +517,8 @@ retry:
* case.
*/
if (unlikely(fault & VM_FAULT_RETRY)) {
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
mmap_read_unlock(current->mm);
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 270fa3c0d372..26427311fc72 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -375,7 +375,7 @@ int pasemi_dma_alloc_flag(void)
int bit;
retry:
- bit = find_next_bit(flags_free, MAX_FLAGS, 0);
+ bit = find_first_bit(flags_free, MAX_FLAGS);
if (bit >= MAX_FLAGS)
return -ENOSPC;
if (!test_and_clear_bit(bit, flags_free))
@@ -440,7 +440,7 @@ int pasemi_dma_alloc_fun(void)
int bit;
retry:
- bit = find_next_bit(fun_free, MAX_FLAGS, 0);
+ bit = find_first_bit(fun_free, MAX_FLAGS);
if (bit >= MAX_FLAGS)
return -ENOSPC;
if (!test_and_clear_bit(bit, fun_free))
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 09abf62ae0ad..5adcbd9b5e88 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -14,6 +14,7 @@ config RISCV
def_bool y
select ARCH_CLOCKSOURCE_INIT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -75,6 +76,7 @@ config RISCV
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
@@ -145,27 +147,16 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
-config VA_BITS
- int
- default 32 if 32BIT
- default 39 if 64BIT
-
-config PA_BITS
- int
- default 34 if 32BIT
- default 56 if 64BIT
-
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
+ default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
- default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+ default 0xffffaf8000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
- default 0xdfffffc800000000 if 64BIT
+ default 0xdfffffff00000000 if 64BIT
default 0xffffffff if 32BIT
config ARCH_FLATMEM_ENABLE
@@ -211,7 +202,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 3 if 64BIT
+ default 4 if 64BIT
default 2
config LOCKDEP_SUPPORT
@@ -269,24 +260,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC
-choice
- prompt "Maximum Physical Memory"
- default MAXPHYSMEM_1GB if 32BIT
- default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
- default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
-
- config MAXPHYSMEM_1GB
- depends on 32BIT
- bool "1GiB"
- config MAXPHYSMEM_2GB
- depends on 64BIT && CMODEL_MEDLOW
- bool "2GiB"
- config MAXPHYSMEM_128GB
- depends on 64BIT && CMODEL_MEDANY
- bool "128GiB"
-endchoice
-
-
config SMP
bool "Symmetric Multi-Processing"
help
@@ -333,6 +306,8 @@ config NUMA
select GENERIC_ARCH_NUMA
select OF_NUMA
select ARCH_SUPPORTS_NUMA_BALANCING
+ select USE_PERCPU_NUMA_NODE_ID
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -348,14 +323,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
@@ -396,12 +363,25 @@ source "kernel/Kconfig.hz"
config RISCV_SBI_V01
bool "SBI v0.1 support"
- default y
depends on RISCV_SBI
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
+config RISCV_BOOT_SPINWAIT
+ bool "Spinwait booting method"
+ depends on SMP
+ default y
+ help
+ This enables support for booting Linux via spinwait method. In the
+ spinwait method, all cores randomly jump to Linux. One of the cores
+ gets chosen via lottery and all other keep spinning on a percpu
+ variable. This method cannot support CPU hotplug and sparse hartid
+ scheme. It should be only enabled for M-mode Linux or platforms relying
+ on older firmware without SBI HSM extension. All other platforms should
+ rely on ordered booting via SBI HSM extension which gets chosen
+ dynamically at runtime if the firmware supports it.
+
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
index 9ee7156c0c31..c61b08ac8554 100644
--- a/arch/riscv/boot/dts/canaan/Makefile
+++ b/arch/riscv/boot/dts/canaan/Makefile
@@ -1,5 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-ifneq ($(CONFIG_SOC_CANAAN_K210_DTB_SOURCE),"")
-dtb-y += $(strip $(shell echo $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))).dtb
+dtb-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .dtb, $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))
obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
-endif
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index 5e8ca8142482..56f57118c633 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -103,8 +103,8 @@
clint0: timer@2000000 {
compatible = "canaan,k210-clint", "sifive,clint0";
reg = <0x2000000 0xC000>;
- interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
- &cpu1_intc 3 &cpu1_intc 7>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>;
};
plic0: interrupt-controller@c000000 {
@@ -113,7 +113,7 @@
compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
reg = <0xC000000 0x4000000>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>;
riscv,ndev = <65>;
};
@@ -130,10 +130,11 @@
compatible = "canaan,k210-gpiohs", "sifive,gpio0";
reg = <0x38001000 0x1000>;
interrupt-controller;
- interrupts = <34 35 36 37 38 39 40 41
- 42 43 44 45 46 47 48 49
- 50 51 52 53 54 55 56 57
- 58 59 60 61 62 63 64 65>;
+ interrupts = <34>, <35>, <36>, <37>, <38>, <39>, <40>,
+ <41>, <42>, <43>, <44>, <45>, <46>, <47>,
+ <48>, <49>, <50>, <51>, <52>, <53>, <54>,
+ <55>, <56>, <57>, <58>, <59>, <60>, <61>,
+ <62>, <63>, <64>, <65>;
gpio-controller;
ngpios = <32>;
};
@@ -141,7 +142,7 @@
dmac0: dma-controller@50000000 {
compatible = "snps,axi-dma-1.01a";
reg = <0x50000000 0x1000>;
- interrupts = <27 28 29 30 31 32>;
+ interrupts = <27>, <28>, <29>, <30>, <31>, <32>;
#dma-cells = <1>;
clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>;
clock-names = "core-clk", "cfgr-clk";
@@ -316,7 +317,7 @@
timer0: timer@502d0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502D0000 0x100>;
- interrupts = <14 15>;
+ interrupts = <14>, <15>;
clocks = <&sysclk K210_CLK_TIMER0>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
@@ -326,7 +327,7 @@
timer1: timer@502e0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502E0000 0x100>;
- interrupts = <16 17>;
+ interrupts = <16>, <17>;
clocks = <&sysclk K210_CLK_TIMER1>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
@@ -336,7 +337,7 @@
timer2: timer@502f0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502F0000 0x100>;
- interrupts = <18 19>;
+ interrupts = <18>, <19>;
clocks = <&sysclk K210_CLK_TIMER2>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index 0bcaf35045e7..984872f3d3a9 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -199,7 +199,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index ac8a03f5867a..7ba99b4da304 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -201,7 +201,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 623998194bc1..be9b12c9b374 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -209,7 +209,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index cf605ba0d67e..031c0c28f819 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -174,7 +174,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index fc1e5869df1b..0c748ae1b006 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -35,6 +35,10 @@
};
};
+&refclk {
+ clock-frequency = <600000000>;
+};
+
&serial0 {
status = "okay";
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index c9f6d205d2ba..869aaf0d5c06 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -9,9 +9,6 @@
model = "Microchip PolarFire SoC";
compatible = "microchip,mpfs";
- chosen {
- };
-
cpus {
#address-cells = <1>;
#size-cells = <0>;
@@ -142,6 +139,11 @@
};
};
+ refclk: msspllclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ };
+
soc {
#address-cells = <2>;
#size-cells = <2>;
@@ -156,62 +158,48 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic>;
- interrupts = <1 2 3>;
+ interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
clint@2000000 {
compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
- interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
- &cpu1_intc 3 &cpu1_intc 7
- &cpu2_intc 3 &cpu2_intc 7
- &cpu3_intc 3 &cpu3_intc 7
- &cpu4_intc 3 &cpu4_intc 7>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>,
+ <&cpu2_intc 3>, <&cpu2_intc 7>,
+ <&cpu3_intc 3>, <&cpu3_intc 7>,
+ <&cpu4_intc 3>, <&cpu4_intc 7>;
};
plic: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
- riscv,ndev = <186>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11
- &cpu1_intc 11 &cpu1_intc 9
- &cpu2_intc 11 &cpu2_intc 9
- &cpu3_intc 11 &cpu3_intc 9
- &cpu4_intc 11 &cpu4_intc 9>;
+ interrupts-extended = <&cpu0_intc 11>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>,
+ <&cpu2_intc 11>, <&cpu2_intc 9>,
+ <&cpu3_intc 11>, <&cpu3_intc 9>,
+ <&cpu4_intc 11>, <&cpu4_intc 9>;
+ riscv,ndev = <186>;
};
dma@3000000 {
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic>;
- interrupts = <23 24 25 26 27 28 29 30>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>;
#dma-cells = <1>;
};
- refclk: refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <600000000>;
- clock-output-names = "msspllclk";
- };
-
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
- reg-names = "mss_sysreg";
clocks = <&refclk>;
#clock-cells = <1>;
- clock-output-names = "cpu", "axi", "ahb", "envm", /* 0-3 */
- "mac0", "mac1", "mmc", "timer", /* 4-7 */
- "mmuart0", "mmuart1", "mmuart2", "mmuart3", /* 8-11 */
- "mmuart4", "spi0", "spi1", "i2c0", /* 12-15 */
- "i2c1", "can0", "can1", "usb", /* 16-19 */
- "rsvd", "rtc", "qspi", "gpio0", /* 20-23 */
- "gpio1", "gpio2", "ddrc", "fic0", /* 24-27 */
- "fic1", "fic2", "fic3", "athena", "cfm"; /* 28-32 */
};
serial0: serial@20000000 {
@@ -267,7 +255,7 @@
compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
- interrupts = <88 89>;
+ interrupts = <88>, <89>;
clocks = <&clkcfg 6>;
max-frequency = <200000000>;
status = "disabled";
@@ -277,7 +265,7 @@
compatible = "cdns,macb";
reg = <0x0 0x20110000 0x0 0x2000>;
interrupt-parent = <&plic>;
- interrupts = <64 65 66 67>;
+ interrupts = <64>, <65>, <66>, <67>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 4>, <&clkcfg 2>;
clock-names = "pclk", "hclk";
@@ -290,7 +278,7 @@
compatible = "cdns,macb";
reg = <0x0 0x20112000 0x0 0x2000>;
interrupt-parent = <&plic>;
- interrupts = <70 71 72 73>;
+ interrupts = <70>, <71>, <72>, <73>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 5>, <&clkcfg 2>;
status = "disabled";
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 0655b5c4201d..3eef52b1a59b 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -137,20 +137,21 @@
soc {
#address-cells = <2>;
#size-cells = <2>;
- compatible = "sifive,fu540-c000", "sifive,fu540", "simple-bus";
+ compatible = "simple-bus";
ranges;
plic0: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
- riscv,ndev = <53>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 0xffffffff
- &cpu1_intc 0xffffffff &cpu1_intc 9
- &cpu2_intc 0xffffffff &cpu2_intc 9
- &cpu3_intc 0xffffffff &cpu3_intc 9
- &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
+ riscv,ndev = <53>;
};
prci: clock-controller@10000000 {
compatible = "sifive,fu540-c000-prci";
@@ -170,7 +171,8 @@
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic0>;
- interrupts = <23 24 25 26 27 28 29 30>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>;
#dma-cells = <1>;
};
uart1: serial@10011000 {
@@ -195,8 +197,8 @@
};
qspi0: spi@10040000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10040000 0x0 0x1000
- 0x0 0x20000000 0x0 0x10000000>;
+ reg = <0x0 0x10040000 0x0 0x1000>,
+ <0x0 0x20000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <51>;
clocks = <&prci PRCI_CLK_TLCLK>;
@@ -206,8 +208,8 @@
};
qspi1: spi@10041000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10041000 0x0 0x1000
- 0x0 0x30000000 0x0 0x10000000>;
+ reg = <0x0 0x10041000 0x0 0x1000>,
+ <0x0 0x30000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <52>;
clocks = <&prci PRCI_CLK_TLCLK>;
@@ -229,8 +231,8 @@
compatible = "sifive,fu540-c000-gem";
interrupt-parent = <&plic0>;
interrupts = <53>;
- reg = <0x0 0x10090000 0x0 0x2000
- 0x0 0x100a0000 0x0 0x1000>;
+ reg = <0x0 0x10090000 0x0 0x2000>,
+ <0x0 0x100a0000 0x0 0x1000>;
local-mac-address = [00 00 00 00 00 00];
clock-names = "pclk", "hclk";
clocks = <&prci PRCI_CLK_GEMGXLPLL>,
@@ -243,7 +245,7 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10020000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <42 43 44 45>;
+ interrupts = <42>, <43>, <44>, <45>;
clocks = <&prci PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
@@ -252,7 +254,7 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10021000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <46 47 48 49>;
+ interrupts = <46>, <47>, <48>, <49>;
clocks = <&prci PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
@@ -265,7 +267,7 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic0>;
- interrupts = <1 2 3>;
+ interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
gpio: gpio@10060000 {
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
index abbb960f90a0..8464b0e3c887 100644
--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -147,12 +147,12 @@
reg = <0x0 0xc000000 0x0 0x4000000>;
riscv,ndev = <69>;
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 0xffffffff
- &cpu1_intc 0xffffffff &cpu1_intc 9
- &cpu2_intc 0xffffffff &cpu2_intc 9
- &cpu3_intc 0xffffffff &cpu3_intc 9
- &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
};
prci: clock-controller@10000000 {
compatible = "sifive,fu740-c000-prci";
@@ -273,7 +273,7 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic0>;
- interrupts = <19 21 22 20>;
+ interrupts = <19>, <21>, <22>, <20>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
gpio: gpio@10060000 {
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 6bfa1f24d3de..c4ed9efdff03 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -39,6 +39,11 @@
clock-frequency = <RTCCLK_FREQ>;
clock-output-names = "rtcclk";
};
+
+ gpio-poweroff {
+ compatible = "gpio-poweroff";
+ gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+ };
};
&uart0 {
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index ef473e2f503b..f120fcc43d0a 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,10 +14,10 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
-CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_VIRTUALIZATION=y
@@ -70,14 +71,14 @@ CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
+# CONFIG_PTP_1588_CLOCK is not set
CONFIG_GPIOLIB=y
CONFIG_GPIO_SIFIVE=y
-# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
CONFIG_DRM=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_NOUVEAU=m
CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -88,10 +89,10 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
+CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_CADENCE=y
-CONFIG_MMC=y
CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
@@ -142,5 +143,3 @@ CONFIG_RCU_EQS_DEBUG=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_EFI=y
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index b16a2a12c82a..3f42ed87dde8 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -29,8 +29,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
@@ -75,7 +73,6 @@ CONFIG_LEDS_GPIO=y
CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 61f887f65419..2a82a3b2992b 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -21,8 +21,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
@@ -30,7 +28,6 @@ CONFIG_CMDLINE_FORCE=y
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
# CONFIG_GCC_PLUGINS is not set
-# CONFIG_BLK_DEV_BSG is not set
# CONFIG_MQ_IOSCHED_DEADLINE is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
CONFIG_BINFMT_FLAT=y
@@ -72,7 +69,6 @@ CONFIG_LEDS_GPIO=y
CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_EXT2_FS=y
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index e046a0babde4..e1c9864b6237 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -24,15 +24,12 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
CONFIG_JUMP_LABEL=y
-# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
# CONFIG_EFI_PARTITION is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 6e9f12ff968a..8b56a7f1eb06 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,7 +14,7 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
@@ -69,10 +70,10 @@ CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -132,4 +133,3 @@ CONFIG_RCU_EQS_DEBUG=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
diff --git a/arch/riscv/errata/alternative.c b/arch/riscv/errata/alternative.c
index 3b15885db70b..e8b4a0fe488c 100644
--- a/arch/riscv/errata/alternative.c
+++ b/arch/riscv/errata/alternative.c
@@ -22,7 +22,8 @@ static struct cpu_manufacturer_info_t {
} cpu_mfr_info;
static void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end,
- unsigned long archid, unsigned long impid);
+ unsigned long archid,
+ unsigned long impid) __initdata;
static inline void __init riscv_fill_cpu_mfr_info(void)
{
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 445ccc97305a..57b86fd9916c 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += early_ioremap.h
-generic-y += extable.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += user.h
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
new file mode 100644
index 000000000000..14be0673f5b5
--- /dev/null
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_FIXUP 1
+#define EX_TYPE_BPF 2
+#define EX_TYPE_UACCESS_ERR_ZERO 3
+
+#ifdef __ASSEMBLY__
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ .pushsection __ex_table, "a"; \
+ .balign 4; \
+ .long ((insn) - .); \
+ .long ((fixup) - .); \
+ .short (type); \
+ .short (data); \
+ .popsection;
+
+ .macro _asm_extable, insn, fixup
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
+ .endm
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/bits.h>
+#include <linux/stringify.h>
+#include <asm/gpr-num.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ ".pushsection __ex_table, \"a\"\n" \
+ ".balign 4\n" \
+ ".long ((" insn ") - .)\n" \
+ ".long ((" fixup ") - .)\n" \
+ ".short (" type ")\n" \
+ ".short (" data ")\n" \
+ ".popsection\n"
+
+#define _ASM_EXTABLE(insn, fixup) \
+ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
+
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+#define EX_DATA_REG(reg, gpr) \
+ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_UACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 396a3303c537..3540b690944b 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/ffs.h>
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h
index a8ec3c5c1bd2..134590f1b843 100644
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -40,7 +40,5 @@ struct cpu_operations {
extern const struct cpu_operations *cpu_ops[NR_CPUS];
void __init cpu_set_ops(int cpu);
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h
new file mode 100644
index 000000000000..56e4b76d09ff
--- /dev/null
+++ b/arch/riscv/include/asm/cpu_ops_sbi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 by Rivos Inc.
+ */
+#ifndef __ASM_CPU_OPS_SBI_H
+#define __ASM_CPU_OPS_SBI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/**
+ * struct sbi_hart_boot_data - Hart specific boot used during booting and
+ * cpu hotplug.
+ * @task_ptr: A pointer to the hart specific tp
+ * @stack_ptr: A pointer to the hart specific sp
+ */
+struct sbi_hart_boot_data {
+ void *task_ptr;
+ void *stack_ptr;
+};
+#endif
+
+#endif /* ifndef __ASM_CPU_OPS_SBI_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 5046f431645c..ae711692eec9 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h
new file mode 100644
index 000000000000..512012d193dc
--- /dev/null
+++ b/arch/riscv/include/asm/extable.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_EXTABLE_H
+#define _ASM_RISCV_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+ int insn, fixup;
+ short type, data;
+};
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
+} while (0)
+
+bool fixup_exception(struct pt_regs *regs);
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+#else
+static inline bool
+ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return false;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 54cbf07fb4e9..58a718573ad6 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 1b00badb9f87..fc8130f995c1 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -11,6 +11,7 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
/* We don't even really need the extable code, but for now keep it simple */
#ifndef CONFIG_MMU
@@ -20,23 +21,14 @@
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
- uintptr_t tmp; \
__enable_user_access(); \
__asm__ __volatile__ ( \
"1: " insn " \n" \
"2: \n" \
- " .section .fixup,\"ax\" \n" \
- " .balign 4 \n" \
- "3: li %[r],%[e] \n" \
- " jump 2b,%[t] \n" \
- " .previous \n" \
- " .section __ex_table,\"a\" \n" \
- " .balign " RISCV_SZPTR " \n" \
- " " RISCV_PTR " 1b, 3b \n" \
- " .previous \n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
: [r] "+r" (ret), [ov] "=&r" (oldval), \
- [u] "+m" (*uaddr), [t] "=&r" (tmp) \
- : [op] "Jr" (oparg), [e] "i" (-EFAULT) \
+ [u] "+m" (*uaddr) \
+ : [op] "Jr" (oparg) \
: "memory"); \
__disable_user_access(); \
}
@@ -98,18 +90,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"2: sc.w.aqrl %[t],%z[nv],%[u] \n"
" bnez %[t],1b \n"
"3: \n"
- " .section .fixup,\"ax\" \n"
- " .balign 4 \n"
- "4: li %[r],%[e] \n"
- " jump 3b,%[t] \n"
- " .previous \n"
- " .section __ex_table,\"a\" \n"
- " .balign " RISCV_SZPTR " \n"
- " " RISCV_PTR " 1b, 4b \n"
- " " RISCV_PTR " 2b, 4b \n"
- " .previous \n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \
: [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
- : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+ : [ov] "Jr" (oldval), [nv] "Jr" (newval)
: "memory");
__disable_user_access();
diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h
new file mode 100644
index 000000000000..dfee2829fc7c
--- /dev/null
+++ b/arch/riscv/include/asm/gpr-num.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLY__
+ .equ .L__gpr_num_zero, 0
+ .equ .L__gpr_num_ra, 1
+ .equ .L__gpr_num_sp, 2
+ .equ .L__gpr_num_gp, 3
+ .equ .L__gpr_num_tp, 4
+ .equ .L__gpr_num_t0, 5
+ .equ .L__gpr_num_t1, 6
+ .equ .L__gpr_num_t2, 7
+ .equ .L__gpr_num_s0, 8
+ .equ .L__gpr_num_s1, 9
+ .equ .L__gpr_num_a0, 10
+ .equ .L__gpr_num_a1, 11
+ .equ .L__gpr_num_a2, 12
+ .equ .L__gpr_num_a3, 13
+ .equ .L__gpr_num_a4, 14
+ .equ .L__gpr_num_a5, 15
+ .equ .L__gpr_num_a6, 16
+ .equ .L__gpr_num_a7, 17
+ .equ .L__gpr_num_s2, 18
+ .equ .L__gpr_num_s3, 19
+ .equ .L__gpr_num_s4, 20
+ .equ .L__gpr_num_s5, 21
+ .equ .L__gpr_num_s6, 22
+ .equ .L__gpr_num_s7, 23
+ .equ .L__gpr_num_s8, 24
+ .equ .L__gpr_num_s9, 25
+ .equ .L__gpr_num_s10, 26
+ .equ .L__gpr_num_s11, 27
+ .equ .L__gpr_num_t3, 28
+ .equ .L__gpr_num_t4, 29
+ .equ .L__gpr_num_t5, 30
+ .equ .L__gpr_num_t6, 31
+
+#else /* __ASSEMBLY__ */
+
+#define __DEFINE_ASM_GPR_NUMS \
+" .equ .L__gpr_num_zero, 0\n" \
+" .equ .L__gpr_num_ra, 1\n" \
+" .equ .L__gpr_num_sp, 2\n" \
+" .equ .L__gpr_num_gp, 3\n" \
+" .equ .L__gpr_num_tp, 4\n" \
+" .equ .L__gpr_num_t0, 5\n" \
+" .equ .L__gpr_num_t1, 6\n" \
+" .equ .L__gpr_num_t2, 7\n" \
+" .equ .L__gpr_num_s0, 8\n" \
+" .equ .L__gpr_num_s1, 9\n" \
+" .equ .L__gpr_num_a0, 10\n" \
+" .equ .L__gpr_num_a1, 11\n" \
+" .equ .L__gpr_num_a2, 12\n" \
+" .equ .L__gpr_num_a3, 13\n" \
+" .equ .L__gpr_num_a4, 14\n" \
+" .equ .L__gpr_num_a5, 15\n" \
+" .equ .L__gpr_num_a6, 16\n" \
+" .equ .L__gpr_num_a7, 17\n" \
+" .equ .L__gpr_num_s2, 18\n" \
+" .equ .L__gpr_num_s3, 19\n" \
+" .equ .L__gpr_num_s4, 20\n" \
+" .equ .L__gpr_num_s5, 21\n" \
+" .equ .L__gpr_num_s6, 22\n" \
+" .equ .L__gpr_num_s7, 23\n" \
+" .equ .L__gpr_num_s8, 24\n" \
+" .equ .L__gpr_num_s9, 25\n" \
+" .equ .L__gpr_num_s10, 26\n" \
+" .equ .L__gpr_num_s11, 27\n" \
+" .equ .L__gpr_num_t3, 28\n" \
+" .equ .L__gpr_num_t4, 29\n" \
+" .equ .L__gpr_num_t5, 30\n" \
+" .equ .L__gpr_num_t6, 31\n"
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b00f503ec124..0b85e363e778 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
#endif
#endif
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 2639b9ee48f9..99ef6a120617 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -77,13 +77,6 @@ struct kvm_sbi_context {
int return_handled;
};
-#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
-
-struct kvm_mmu_page_cache {
- int nobjs;
- void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
-};
-
struct kvm_cpu_trap {
unsigned long sepc;
unsigned long scause;
@@ -193,7 +186,7 @@ struct kvm_vcpu_arch {
struct kvm_sbi_context sbi_context;
/* Cache pages needed to program page tables with spinlock held */
- struct kvm_mmu_page_cache mmu_page_cache;
+ struct kvm_mmu_memory_cache mmu_page_cache;
/* VCPU power-off state */
bool power_off;
@@ -208,7 +201,6 @@ struct kvm_vcpu_arch {
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -221,12 +213,12 @@ void __kvm_riscv_hfence_gvma_all(void);
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write);
-void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
void kvm_riscv_stage2_mode_detect(void);
unsigned long kvm_riscv_stage2_mode(void);
+int kvm_riscv_stage2_gpa_bits(void);
void kvm_riscv_stage2_vmid_detect(void);
unsigned long kvm_riscv_stage2_vmid_bits(void);
diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h
index e476b404eb67..e15765f98d7a 100644
--- a/arch/riscv/include/asm/kvm_types.h
+++ b/arch/riscv/include/asm/kvm_types.h
@@ -2,6 +2,6 @@
#ifndef _ASM_RISCV_KVM_TYPES_H
#define _ASM_RISCV_KVM_TYPES_H
-#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 32
#endif /* _ASM_RISCV_KVM_TYPES_H */
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
new file mode 100644
index 000000000000..76e4e17a3e00
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/**
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#ifndef __RISCV_KVM_VCPU_SBI_H__
+#define __RISCV_KVM_VCPU_SBI_H__
+
+#define KVM_SBI_IMPID 3
+
+#define KVM_SBI_VERSION_MAJOR 0
+#define KVM_SBI_VERSION_MINOR 2
+
+struct kvm_vcpu_sbi_extension {
+ unsigned long extid_start;
+ unsigned long extid_end;
+ /**
+ * SBI extension handler. It can be defined for a given extension or group of
+ * extension. But it should always return linux error codes rather than SBI
+ * specific error codes.
+ */
+ int (*handler)(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val, struct kvm_cpu_trap *utrap,
+ bool *exit);
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run);
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
+
+#endif /* __RISCV_KVM_VCPU_SBI_H__ */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b3e5ff0125fe..160e3a1e8f8b 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET kernel_map.page_offset
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */
struct kernel_mapping {
+ unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 0af6933a7100..11823004b87a 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return __pud_alloc_one(mm, addr);
+
+ return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 228261aa9628..bbbdd66e5e2f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -8,16 +8,36 @@
#include <linux/const.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT_L3 30
+#define PGDIR_SHIFT_L4 39
+#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
+}
+
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+ return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index 2ee413912926..a6b0c89824c2 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -31,7 +31,7 @@
* _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
* distinguish them from swapped out pages
*/
-#define _PAGE_PROT_NONE _PAGE_READ
+#define _PAGE_PROT_NONE _PAGE_GLOBAL
#define _PAGE_PFN_SHIFT 10
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index bf204e7c1f74..7e949f25c933 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,8 +24,19 @@
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
+
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
@@ -39,8 +50,10 @@
/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
-#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
-#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
/*
@@ -48,10 +61,16 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#ifdef CONFIG_64BIT
+#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#else
+#define VA_BITS 32
+#endif
+
#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
/*
@@ -83,8 +102,7 @@
#ifndef __ASSEMBLY__
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -107,19 +125,27 @@
#define XIP_FIXUP(addr) (addr)
#endif /* CONFIG_XIP_KERNEL */
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+ pud_t *(*get_pud_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pud)(uintptr_t va);
+#endif
+};
+
+extern struct pt_alloc_ops pt_ops __initdata;
+#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/* Page protection bits */
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
-#define PAGE_NONE __pgprot(_PAGE_PROT_NONE)
+#define PAGE_NONE __pgprot(_PAGE_PROT_NONE | _PAGE_READ)
#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ)
#define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
#define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC)
@@ -628,11 +654,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
- * bit 1: _PAGE_PROT_NONE (zero)
- * bits 2 to 6: swap type
- * bits 7 to XLEN-1: swap offset
+ * bit 1 to 3: _PAGE_LEAF (zero)
+ * bit 5: _PAGE_PROT_NONE (zero)
+ * bits 6 to 10: swap type
+ * bits 10 to XLEN-1: swap offset
*/
-#define __SWP_TYPE_SHIFT 2
+#define __SWP_TYPE_SHIFT 6
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -648,12 +675,17 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp) __pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
/*
* In the RV64 Linux scheme, we give the user half of the virtual-address space
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
-#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
@@ -661,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
@@ -691,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_va _dtb_early_va
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 0d42693cb65e..d1c37479d828 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_SBI_H
#include <linux/types.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
@@ -27,6 +28,15 @@ enum sbi_ext_id {
SBI_EXT_IPI = 0x735049,
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
+ SBI_EXT_SRST = 0x53525354,
+
+ /* Experimentals extensions must lie within this range */
+ SBI_EXT_EXPERIMENTAL_START = 0x08000000,
+ SBI_EXT_EXPERIMENTAL_END = 0x08FFFFFF,
+
+ /* Vendor extensions must lie within this range */
+ SBI_EXT_VENDOR_START = 0x09000000,
+ SBI_EXT_VENDOR_END = 0x09FFFFFF,
};
enum sbi_ext_base_fid {
@@ -70,6 +80,21 @@ enum sbi_hsm_hart_status {
SBI_HSM_HART_STATUS_STOP_PENDING,
};
+enum sbi_ext_srst_fid {
+ SBI_EXT_SRST_RESET = 0,
+};
+
+enum sbi_srst_reset_type {
+ SBI_SRST_RESET_TYPE_SHUTDOWN = 0,
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_TYPE_WARM_REBOOT,
+};
+
+enum sbi_srst_reset_reason {
+ SBI_SRST_RESET_REASON_NONE = 0,
+ SBI_SRST_RESET_REASON_SYS_FAILURE,
+};
+
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -82,6 +107,7 @@ enum sbi_hsm_hart_status {
#define SBI_ERR_INVALID_PARAM -3
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -103,27 +129,27 @@ long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
-int sbi_send_ipi(const unsigned long *hart_mask);
-int sbi_remote_fence_i(const unsigned long *hart_mask);
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const struct cpumask *cpu_mask);
+int sbi_remote_fence_i(const struct cpumask *cpu_mask);
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid);
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
@@ -148,9 +174,17 @@ static inline unsigned long sbi_minor_version(void)
return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
}
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
+}
+
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
-static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index a7d2811f3536..23170c933d73 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -43,7 +43,6 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
void arch_send_call_function_single_ipi(int cpu);
int riscv_hartid_to_cpuid(int hartid);
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
/* Set custom IPI operations */
void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
@@ -63,8 +62,6 @@ asmlinkage void smp_callin(void);
#if defined CONFIG_HOTPLUG_CPU
int __cpu_disable(void);
void __cpu_die(unsigned int cpu);
-void cpu_stop(void);
-#else
#endif /* CONFIG_HOTPLUG_CPU */
#else
@@ -85,13 +82,6 @@ static inline unsigned long cpuid_to_hartid_map(int cpu)
return boot_cpu_hartid;
}
-static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
- struct cpumask *out)
-{
- cpumask_clear(out);
- cpumask_set_cpu(boot_cpu_hartid, out);
-}
-
static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
{
}
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index 45a7018a8118..63acaecc3374 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS 56
+#else
+#define MAX_PHYSMEM_BITS 34
+#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index f314ff44c48d..c701a5e57a2b 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -8,6 +8,7 @@
#ifndef _ASM_RISCV_UACCESS_H
#define _ASM_RISCV_UACCESS_H
+#include <asm/asm-extable.h>
#include <asm/pgtable.h> /* for TASK_SIZE */
/*
@@ -80,25 +81,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
#define __get_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(x) __x; \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %1, %3\n" \
+ " " insn " %1, %2\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " li %1, 0\n" \
- " jump 2b, %2\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__x), "=r" (__tmp) \
- : "m" (*(ptr)), "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
+ : "+r" (err), "=&r" (__x) \
+ : "m" (*(ptr))); \
(x) = __x; \
} while (0)
@@ -110,30 +100,18 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
- uintptr_t __tmp; \
__asm__ __volatile__ ( \
"1:\n" \
- " lw %1, %4\n" \
+ " lw %1, %3\n" \
"2:\n" \
- " lw %2, %5\n" \
+ " lw %2, %4\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " li %1, 0\n" \
- " li %2, 0\n" \
- " jump 3b, %3\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__lo), "=r" (__hi), \
- "=r" (__tmp) \
- : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]), \
- "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
+ : "+r" (err), "=&r" (__lo), "=r" (__hi) \
+ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
+ if (err) \
+ __hi = 0; \
(x) = (__typeof__(x))((__typeof__((x)-(x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
@@ -221,24 +199,14 @@ do { \
#define __put_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(*(ptr)) __x = x; \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %z3, %2\n" \
+ " " insn " %z2, %1\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " jump 2b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), "=m" (*(ptr)) \
- : "rJ" (__x), "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
+ : "+r" (err), "=m" (*(ptr)) \
+ : "rJ" (__x)); \
} while (0)
#ifdef CONFIG_64BIT
@@ -249,28 +217,18 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u64 __x = (__typeof__((x)-(x)))(x); \
- uintptr_t __tmp; \
__asm__ __volatile__ ( \
"1:\n" \
- " sw %z4, %2\n" \
+ " sw %z3, %1\n" \
"2:\n" \
- " sw %z5, %3\n" \
+ " sw %z4, %2\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " jump 3b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
+ : "+r" (err), \
"=m" (__ptr[__LSW]), \
"=m" (__ptr[__MSW]) \
- : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
+ : "rJ" (__x), "rJ" (__x >> 32)); \
} while (0)
#endif /* CONFIG_64BIT */
@@ -388,81 +346,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__clear_user(to, n) : n;
}
-/*
- * Atomic compare-and-exchange, but with a fixup for userspace faults. Faults
- * will set "err" to -EFAULT, while successful accesses return the previous
- * value.
- */
-#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- __typeof__(err) __err = 0; \
- register unsigned int __rc; \
- __enable_user_access(); \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.w" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.w" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.d" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.d" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __disable_user_access(); \
- (err) = __err; \
- __ret; \
-})
-
#define HAVE_GET_KERNEL_NOFAULT
#define __get_kernel_nofault(dst, src, type, err_label) \
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3397ddac1a30..612556faa527 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -43,7 +43,8 @@ obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
-obj-$(CONFIG_SMP) += cpu_ops_spinwait.o
+
+obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 253126e4beef..df0519a64eaf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
+#include <asm/cpu_ops_sbi.h>
void asm_offsets(void);
@@ -468,4 +469,6 @@ void asm_offsets(void)
DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
+ OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
+ OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
}
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index df84e0c13db1..be7f05b542bb 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -14,12 +14,6 @@
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
-void cpu_stop(void);
-void arch_cpu_idle_dead(void)
-{
- cpu_stop();
-}
-
bool cpu_has_hotplug(unsigned int cpu)
{
if (cpu_ops[cpu]->cpu_stop)
@@ -75,7 +69,7 @@ void __cpu_die(unsigned int cpu)
/*
* Called from the idle thread for the CPU which has been shutdown.
*/
-void cpu_stop(void)
+void arch_cpu_idle_dead(void)
{
idle_task_exit();
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index f13b2c9ea912..ad0a7e9f828b 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -7,6 +7,7 @@
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/smp.h>
+#include <asm/pgtable.h>
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
@@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa)
seq_puts(f, "\n");
}
-static void print_mmu(struct seq_file *f, const char *mmu_type)
+static void print_mmu(struct seq_file *f)
{
+ char sv_type[16];
+
#if defined(CONFIG_32BIT)
- if (strcmp(mmu_type, "riscv,sv32") != 0)
- return;
+ strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (strcmp(mmu_type, "riscv,sv39") != 0 &&
- strcmp(mmu_type, "riscv,sv48") != 0)
- return;
+ if (pgtable_l4_enabled)
+ strncpy(sv_type, "sv48", 5);
+ else
+ strncpy(sv_type, "sv39", 5);
#endif
-
- seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+ seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
static void *c_start(struct seq_file *m, loff_t *pos)
@@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpu_id, NULL);
- const char *compat, *isa, *mmu;
+ const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
- if (!of_property_read_string(node, "mmu-type", &mmu))
- print_mmu(m, mmu);
+ print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index 1985884fe829..170d07e57721 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -8,37 +8,29 @@
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS] __section(".data");
-void *__cpu_up_task_pointer[NR_CPUS] __section(".data");
-
extern const struct cpu_operations cpu_ops_sbi;
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern const struct cpu_operations cpu_ops_spinwait;
-
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle)
-{
- int hartid = cpuid_to_hartid_map(cpuid);
-
- /* Make sure tidle is updated */
- smp_mb();
- WRITE_ONCE(__cpu_up_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
- WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
-}
+#else
+const struct cpu_operations cpu_ops_spinwait = {
+ .name = "",
+ .cpu_prepare = NULL,
+ .cpu_start = NULL,
+};
+#endif
void __init cpu_set_ops(int cpuid)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
if (!cpuid)
- pr_info("SBI v0.2 HSM extension detected\n");
+ pr_info("SBI HSM extension detected\n");
cpu_ops[cpuid] = &cpu_ops_sbi;
} else
#endif
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 685fae72b7f5..dae29cbfe550 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -7,13 +7,22 @@
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/sbi.h>
#include <asm/smp.h>
extern char secondary_start_sbi[];
const struct cpu_operations cpu_ops_sbi;
+/*
+ * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
+ * be invoked from multiple threads in parallel. Define a per cpu data
+ * to handle that.
+ */
+DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
{
@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
- int rc;
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
int hartid = cpuid_to_hartid_map(cpuid);
-
- cpu_update_secondary_bootdata(cpuid, tidle);
- rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
-
- return rc;
+ unsigned long hsm_data;
+ struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ bdata->task_ptr = tidle;
+ bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ /* Make sure boot data is updated */
+ smp_mb();
+ hsm_data = __pa(bdata);
+ return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
static int sbi_cpu_prepare(unsigned int cpuid)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index b2c957bb68c1..346847f6c41c 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -6,11 +6,36 @@
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations cpu_ops_spinwait;
+void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
+void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
+
+static void cpu_update_secondary_bootdata(unsigned int cpuid,
+ struct task_struct *tidle)
+{
+ int hartid = cpuid_to_hartid_map(cpuid);
+
+ /*
+ * The hartid must be less than NR_CPUS to avoid out-of-bound access
+ * errors for __cpu_spinwait_stack/task_pointer. That is not always possible
+ * for platforms with discontiguous hartid numbering scheme. That's why
+ * spinwait booting is not the recommended approach for any platforms
+ * booting Linux in S-mode and can be disabled in the future.
+ */
+ if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ return;
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+ task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+}
static int spinwait_cpu_prepare(unsigned int cpuid)
{
@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
* selects the first cpu to boot the kernel and causes the remainder
* of the cpus to spin in a loop waiting for their stack pointer to be
* setup by that main cpu. Writing to bootdata
- * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
+ * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
* can continue the boot process.
*/
cpu_update_secondary_bootdata(cpuid, tidle);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index f52f01ecbeea..2363b43312fc 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/csr.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include "efi-header.S"
@@ -105,7 +106,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
- li a1, SATP_MODE
+ la a1, satp_mode
+ REG_L a1, 0(a1)
or a2, a2, a1
/*
@@ -135,7 +137,7 @@ relocate:
/*
* Switch to kernel page tables. A full fence is necessary in order to
* avoid using the trampoline translations, which are only correct for
- * the first superpage. Fetching the fence is guarnteed to work
+ * the first superpage. Fetching the fence is guaranteed to work
* because that first superpage is translated the same way.
*/
csrw CSR_SATP, a2
@@ -167,18 +169,17 @@ secondary_start_sbi:
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
- slli a3, a0, LGREG
- la a4, __cpu_up_stack_pointer
- XIP_FIXUP_OFFSET a4
- la a5, __cpu_up_task_pointer
- XIP_FIXUP_OFFSET a5
- add a4, a3, a4
- add a5, a3, a5
- REG_L sp, (a4)
- REG_L tp, (a5)
-
- .global secondary_start_common
-secondary_start_common:
+ /* a0 contains the hartid & a1 contains boot data */
+ li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a2
+ add a2, a2, a1
+ REG_L tp, (a2)
+ li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a3
+ add a3, a3, a1
+ REG_L sp, (a3)
+
+.Lsecondary_start_common:
#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
@@ -258,13 +259,13 @@ pmp_done:
li t0, SR_FS
csrc CSR_STATUS, t0
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
-#endif
+ /* The lottery system is only required for spinwait booting method */
#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
@@ -283,6 +284,10 @@ pmp_done:
/* first time here if hart_lottery in RAM is not set */
beq t0, t1, .Lsecondary_start
+#endif /* CONFIG_XIP */
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
+
+#ifdef CONFIG_XIP_KERNEL
la sp, _end + THREAD_SIZE
XIP_FIXUP_OFFSET sp
mv s0, a0
@@ -339,16 +344,16 @@ clear_bss_done:
call soc_early_init
tail start_kernel
+#if CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
-#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
- la a1, __cpu_up_stack_pointer
+ la a1, __cpu_spinwait_stack_pointer
XIP_FIXUP_OFFSET a1
- la a2, __cpu_up_task_pointer
+ la a2, __cpu_spinwait_task_pointer
XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
@@ -365,8 +370,8 @@ clear_bss_done:
beqz tp, .Lwait_for_cpu_up
fence
- tail secondary_start_common
-#endif
+ tail .Lsecondary_start_common
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
END(_start_kernel)
@@ -448,7 +453,3 @@ ENTRY(reset_regs)
ret
END(reset_regs)
#endif /* CONFIG_RISCV_M_MODE */
-
-__PAGE_ALIGNED_BSS
- /* Empty zero page */
- .balign PAGE_SIZE
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index aabbc3ac3e48..726731ada534 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
asmlinkage void __init __copy_data(void);
#endif
-extern void *__cpu_up_stack_pointer[];
-extern void *__cpu_up_task_pointer[];
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
+extern void *__cpu_spinwait_stack_pointer[];
+extern void *__cpu_spinwait_task_pointer[];
+#endif
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index a80b52a74f58..059c5e216ae7 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -159,25 +159,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
* s0: (const) Phys address to jump to
* s1: (const) Phys address of the FDT image
* s2: (const) The hartid of the current hart
- * s3: (const) kernel_map.va_pa_offset, used when switching MMU off
*/
mv s0, a1
mv s1, a2
mv s2, a3
- mv s3, a4
/* Disable / cleanup interrupts */
csrw CSR_SIE, zero
csrw CSR_SIP, zero
- /* Switch to physical addressing */
- la s4, 1f
- sub s4, s4, s3
- csrw CSR_STVEC, s4
- csrw CSR_SATP, zero
-
-.align 2
-1:
/* Pass the arguments to the next kernel / Cleanup*/
mv a0, s2
mv a1, s1
@@ -214,7 +204,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
csrw CSR_SCAUSE, zero
csrw CSR_SSCRATCH, zero
- jalr zero, a2, 0
+ /*
+ * Switch to physical addressing
+ * This will also trigger a jump to CSR_STVEC
+ * which in this case is the address of the new
+ * kernel.
+ */
+ csrw CSR_STVEC, a2
+ csrw CSR_SATP, zero
+
SYM_CODE_END(riscv_kexec_norelocate)
.section ".rodata"
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index e6eca271a4d6..cbef0fc73afa 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -169,7 +169,8 @@ machine_kexec(struct kimage *image)
struct kimage_arch *internal = &image->arch;
unsigned long jump_addr = (unsigned long) image->start;
unsigned long first_ind_entry = (unsigned long) &image->head;
- unsigned long this_hart_id = raw_smp_processor_id();
+ unsigned long this_cpu_id = smp_processor_id();
+ unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
unsigned long fdt_addr = internal->fdt_addr;
void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 9c0511119bad..a89243730153 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
struct pt_regs *regs;
regs = task_pt_regs(target);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
}
#ifdef CONFIG_FPU
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 7402a417f38e..f72527fcb347 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/pm.h>
+#include <linux/reboot.h>
#include <asm/sbi.h>
#include <asm/smp.h>
@@ -15,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
-static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
+static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
+static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
@@ -66,6 +67,30 @@ int sbi_err_map_linux_errno(int err)
EXPORT_SYMBOL(sbi_err_map_linux_errno);
#ifdef CONFIG_RISCV_SBI_V01
+static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
+{
+ unsigned long cpuid, hartid;
+ unsigned long hmask = 0;
+
+ /*
+ * There is no maximum hartid concept in RISC-V and NR_CPUS must not be
+ * associated with hartid. As SBI v0.1 is only kept for backward compatibility
+ * and will be removed in the future, there is no point in supporting hartid
+ * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
+ * should be used for platforms with hartid greater than BITS_PER_LONG.
+ */
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hartid >= BITS_PER_LONG) {
+ pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
+ break;
+ }
+ hmask |= 1 << hartid;
+ }
+
+ return hmask;
+}
+
/**
* sbi_console_putchar() - Writes given character to the console device.
* @ch: The data to be written to the console.
@@ -131,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
- sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
+
+ sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
0, 0, 0, 0, 0);
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
int result = 0;
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
/* v0.2 function IDs are equivalent to v0.1 extension IDs */
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
- (unsigned long)hart_mask, 0, 0, 0, 0, 0);
+ (unsigned long)&hart_mask, 0, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
arg4, 0, 0);
break;
default:
@@ -179,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
sbi_major_version(), sbi_minor_version());
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
sbi_major_version(), sbi_minor_version());
@@ -187,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
@@ -211,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, hmask_val, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
struct sbiret ret = {0};
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
+ if (hmask) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
}
@@ -251,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
ecall_failed:
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
return result;
}
-static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
+static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
unsigned long hbase, unsigned long start,
unsigned long size, unsigned long arg4,
unsigned long arg5)
@@ -266,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
+ ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
default:
@@ -302,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
if (ret.error) {
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
}
return result;
}
-static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hmask_val, hartid, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ if (hmask) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
@@ -360,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value)
/**
* sbi_send_ipi() - Send an IPI to any hart.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const struct cpumask *cpu_mask)
{
- return __sbi_send_ipi(hart_mask);
+ return __sbi_send_ipi(cpu_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
/**
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const struct cpumask *cpu_mask)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
- hart_mask, 0, 0, 0, 0);
+ cpu_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
@@ -405,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
/**
* sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
* harts for the specified guest physical address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
*
* Return: None
*/
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
@@ -444,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
* sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
* remote harts for a guest physical address range belonging to a specific VMID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
* @vmid: The value of guest ID (VMID).
*
* Return: 0 if success, Error otherwise.
*/
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- hart_mask, start, size, vmid, 0);
+ cpu_mask, start, size, vmid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
/**
* sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
* harts for the current guest virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
*
* Return: None
*/
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma);
@@ -484,23 +512,49 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
* remote harts for current guest virtual address range belonging to a specific
* ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: None
*/
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
+static void sbi_srst_reset(unsigned long type, unsigned long reason)
+{
+ sbi_ecall(SBI_EXT_SRST, SBI_EXT_SRST_RESET, type, reason,
+ 0, 0, 0, 0);
+ pr_warn("%s: type=0x%lx reason=0x%lx failed\n",
+ __func__, type, reason);
+}
+
+static int sbi_srst_reboot(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ sbi_srst_reset((mode == REBOOT_WARM || mode == REBOOT_SOFT) ?
+ SBI_SRST_RESET_TYPE_WARM_REBOOT :
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_REASON_NONE);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block sbi_srst_reboot_nb;
+
+static void sbi_srst_power_off(void)
+{
+ sbi_srst_reset(SBI_SRST_RESET_TYPE_SHUTDOWN,
+ SBI_SRST_RESET_REASON_NONE);
+}
+
/**
* sbi_probe_extension() - Check if an SBI extension ID is supported or not.
* @extid: The extension ID to be probed.
@@ -564,11 +618,7 @@ long sbi_get_mimpid(void)
static void sbi_send_cpumask_ipi(const struct cpumask *target)
{
- struct cpumask hartid_mask;
-
- riscv_cpuid_to_hartid_mask(target, &hartid_mask);
-
- sbi_send_ipi(cpumask_bits(&hartid_mask));
+ sbi_send_ipi(target);
}
static const struct riscv_ipi_ops sbi_ipi_ops = {
@@ -608,6 +658,14 @@ void __init sbi_init(void)
} else {
__sbi_rfence = __sbi_rfence_v01;
}
+ if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ (sbi_probe_extension(SBI_EXT_SRST) > 0)) {
+ pr_info("SBI SRST extension detected\n");
+ pm_power_off = sbi_srst_power_off;
+ sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot;
+ sbi_srst_reboot_nb.priority = 192;
+ register_restart_handler(&sbi_srst_reboot_nb);
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 2f6da845c9ae..b5d30ea92292 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -59,16 +59,6 @@ int riscv_hartid_to_cpuid(int hartid)
return -ENOENT;
}
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
-{
- int cpu;
-
- cpumask_clear(out);
- for_each_cpu(cpu, in)
- cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
-}
-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
-
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return phys_id == cpuid_to_hartid_map(cpu);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index bd82375db51a..622f226454d5 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -96,7 +96,7 @@ void __init setup_smp(void)
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
cpuid, hart);
- break;
+ continue;
}
cpuid_to_hartid_map(cpuid) = hart;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0daaa3e4630d..fe92e119e6a3 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -54,7 +54,7 @@ void die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception");
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index f5ed08262139..75e0fa8a700a 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -45,7 +45,6 @@ SECTIONS
ENTRY_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
- *(.fixup)
_etext = .;
}
RO_DATA(L1_CACHE_BYTES)
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 5104f3a871e3..4e6c88aa4d87 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,7 +4,7 @@
* Copyright (C) 2017 SiFive
*/
-#define RO_EXCEPTION_TABLE_ALIGN 16
+#define RO_EXCEPTION_TABLE_ALIGN 4
#ifdef CONFIG_XIP_KERNEL
#include "vmlinux-xip.lds.S"
@@ -48,7 +48,6 @@ SECTIONS
ENTRY_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
- *(.fixup)
_etext = .;
}
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 30cdd1df0098..e5c56182f48f 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -5,14 +5,10 @@
ccflags-y += -I $(srctree)/$(src)
-KVM := ../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
-kvm-y += $(KVM)/kvm_main.o
-kvm-y += $(KVM)/coalesced_mmio.o
-kvm-y += $(KVM)/binary_stats.o
-kvm-y += $(KVM)/eventfd.o
kvm-y += main.o
kvm-y += vm.o
kvm-y += vmid.o
@@ -23,4 +19,8 @@ kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
kvm-y += vcpu_switch.o
kvm-y += vcpu_sbi.o
+kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
+kvm-y += vcpu_sbi_base.o
+kvm-y += vcpu_sbi_replace.o
+kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 421ecf4e6360..2e5ca43c8c49 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -58,6 +58,14 @@ int kvm_arch_hardware_enable(void)
void kvm_arch_hardware_disable(void)
{
+ /*
+ * After clearing the hideleg CSR, the host kernel will receive
+ * spurious interrupts if hvip CSR has pending interrupts and the
+ * corresponding enable bits in vsie CSR are asserted. To avoid it,
+ * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR.
+ */
+ csr_write(CSR_VSIE, 0);
+ csr_write(CSR_HVIP, 0);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index fc058ff5f4b6..f80a34fbf102 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -83,43 +83,6 @@ static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
return 0;
}
-static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
- int min, int max)
-{
- void *page;
-
- BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
- if (pcache->nobjs >= min)
- return 0;
- while (pcache->nobjs < max) {
- page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
- pcache->objects[pcache->nobjs++] = page;
- }
-
- return 0;
-}
-
-static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
-{
- while (pcache && pcache->nobjs)
- free_page((unsigned long)pcache->objects[--pcache->nobjs]);
-}
-
-static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
-{
- void *p;
-
- if (!pcache)
- return NULL;
-
- BUG_ON(!pcache->nobjs);
- p = pcache->objects[--pcache->nobjs];
-
- return p;
-}
-
static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
@@ -151,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
- struct cpumask hmask;
unsigned long size = PAGE_SIZE;
struct kvm_vmid *vmid = &kvm->arch.vmid;
@@ -164,14 +126,13 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
* where the Guest/VM is running.
*/
preempt_disable();
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
+ sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
READ_ONCE(vmid->vmid));
preempt_enable();
}
static int stage2_set_pte(struct kvm *kvm, u32 level,
- struct kvm_mmu_page_cache *pcache,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t addr, const pte_t *new_pte)
{
u32 current_level = stage2_pgd_levels - 1;
@@ -186,7 +147,9 @@ static int stage2_set_pte(struct kvm *kvm, u32 level,
return -EEXIST;
if (!pte_val(*ptep)) {
- next_ptep = stage2_cache_alloc(pcache);
+ if (!pcache)
+ return -ENOMEM;
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
@@ -209,7 +172,7 @@ static int stage2_set_pte(struct kvm *kvm, u32 level,
}
static int stage2_map_page(struct kvm *kvm,
- struct kvm_mmu_page_cache *pcache,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa,
unsigned long page_size,
bool page_rdonly, bool page_exec)
@@ -384,7 +347,10 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
int ret = 0;
unsigned long pfn;
phys_addr_t addr, end;
- struct kvm_mmu_page_cache pcache = { 0, };
+ struct kvm_mmu_memory_cache pcache;
+
+ memset(&pcache, 0, sizeof(pcache));
+ pcache.gfp_zero = __GFP_ZERO;
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
@@ -395,9 +361,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
pte = pte_wrprotect(pte);
- ret = stage2_cache_topup(&pcache,
- stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(&pcache, stage2_pgd_levels);
if (ret)
goto out;
@@ -411,7 +375,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
}
out:
- stage2_cache_flush(&pcache);
+ kvm_mmu_free_memory_cache(&pcache);
return ret;
}
@@ -462,7 +426,6 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -472,18 +435,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while
* the memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
- stage2_wp_memory_region(kvm, mem->slot);
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ stage2_wp_memory_region(kvm, new->id);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
+ hva_t hva, reg_end, size;
+ gpa_t base_gpa;
+ bool writable;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -494,10 +457,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the GPA
* space addressable by the KVM guest GPA space.
*/
- if ((memslot->base_gfn + memslot->npages) >=
+ if ((new->base_gfn + new->npages) >=
(stage2_gpa_size >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ size = new->npages << PAGE_SHIFT;
+ reg_end = hva + size;
+ base_gpa = new->base_gfn << PAGE_SHIFT;
+ writable = !(new->flags & KVM_MEM_READONLY);
+
mmap_read_lock(current->mm);
/*
@@ -533,15 +502,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
+ gpa_t gpa = base_gpa + (vm_start - hva);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
@@ -559,8 +527,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- stage2_unmap_range(kvm, mem->guest_phys_addr,
- mem->memory_size, false);
+ stage2_unmap_range(kvm, base_gpa, size, false);
spin_unlock(&kvm->mmu_lock);
out:
@@ -646,7 +613,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
gfn_t gfn = gpa >> PAGE_SHIFT;
struct vm_area_struct *vma;
struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
+ struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq;
@@ -681,8 +648,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
}
/* We need minimum second+third level pages */
- ret = stage2_cache_topup(pcache, stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(pcache, stage2_pgd_levels);
if (ret) {
kvm_err("Failed to topup stage2 cache\n");
return ret;
@@ -731,11 +697,6 @@ out_unlock:
return ret;
}
-void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
-{
- stage2_cache_flush(&vcpu->arch.mmu_page_cache);
-}
-
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
{
struct page *pgd_page;
@@ -806,3 +767,8 @@ unsigned long kvm_riscv_stage2_mode(void)
{
return stage2_mode >> HGATP_MODE_SHIFT;
}
+
+int kvm_riscv_stage2_gpa_bits(void)
+{
+ return stage2_gpa_bits;
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index fb84619df012..0c5239e05721 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -53,6 +53,17 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+ bool loaded;
+
+ /**
+ * The preemption should be disabled here because it races with
+ * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
+ * also calls vcpu_load/put.
+ */
+ get_cpu();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
memcpy(csr, reset_csr, sizeof(*csr));
@@ -64,6 +75,11 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ /* Reset the guest CSRs for hotplug usecase */
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ put_cpu();
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -77,6 +93,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
+ vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/* Setup ISA features available to VCPU */
vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
@@ -100,6 +117,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
+ /**
+ * vcpu with id 0 is the designated boot cpu.
+ * Keep all vcpus with non-zero id in power-off state so that
+ * they can be brought up using SBI HSM extension.
+ */
+ if (vcpu->vcpu_idx != 0)
+ kvm_riscv_vcpu_power_off(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -107,8 +131,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Cleanup VCPU timer */
kvm_riscv_vcpu_timer_deinit(vcpu);
- /* Flush the pages pre-allocated for Stage2 page table mappings */
- kvm_riscv_stage2_flush_cache(vcpu);
+ /* Free unused pages pre-allocated for Stage2 page table mappings */
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 7f2d742ae4c6..571f319e995a 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -146,7 +146,7 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu,
vcpu->stat.wfi_exit_stat++;
if (!kvm_arch_vcpu_runnable(vcpu)) {
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index 1b070152578f..4449a976e5a6 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -26,7 +26,7 @@ void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
cntx->sstatus |= SR_FS_OFF;
}
-void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
{
cntx->sstatus &= ~SR_FS;
cntx->sstatus |= SR_FS_CLEAN;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 3b0e703d22cf..78aa3db76225 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -9,15 +9,58 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
-#include <asm/csr.h>
#include <asm/sbi.h>
-#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
-#define SBI_VERSION_MAJOR 0
-#define SBI_VERSION_MINOR 1
+static int kvm_linux_err_map_sbi(int err)
+{
+ switch (err) {
+ case 0:
+ return SBI_SUCCESS;
+ case -EPERM:
+ return SBI_ERR_DENIED;
+ case -EINVAL:
+ return SBI_ERR_INVALID_PARAM;
+ case -EFAULT:
+ return SBI_ERR_INVALID_ADDRESS;
+ case -EOPNOTSUPP:
+ return SBI_ERR_NOT_SUPPORTED;
+ case -EALREADY:
+ return SBI_ERR_ALREADY_AVAILABLE;
+ default:
+ return SBI_ERR_FAILURE;
+ };
+}
-static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+#ifdef CONFIG_RISCV_SBI_V01
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
+#else
+static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = -1UL,
+ .extid_end = -1UL,
+ .handler = NULL,
+};
+#endif
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
+
+static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
+ &vcpu_sbi_ext_v01,
+ &vcpu_sbi_ext_base,
+ &vcpu_sbi_ext_time,
+ &vcpu_sbi_ext_ipi,
+ &vcpu_sbi_ext_rfence,
+ &vcpu_sbi_ext_hsm,
+ &vcpu_sbi_ext_experimental,
+ &vcpu_sbi_ext_vendor,
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -55,131 +98,73 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-#ifdef CONFIG_RISCV_SBI_V01
-
-static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
- struct kvm_run *run, u32 type)
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid)
{
- int i;
- struct kvm_vcpu *tmp;
+ int i = 0;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
- kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i]->extid_start <= extid &&
+ sbi_ext[i]->extid_end >= extid)
+ return sbi_ext[i];
+ }
- memset(&run->system_event, 0, sizeof(run->system_event));
- run->system_event.type = type;
- run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return NULL;
}
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- ulong hmask;
- int i, ret = 1;
- u64 next_cycle;
- struct kvm_vcpu *rvcpu;
+ int ret = 1;
bool next_sepc = true;
- struct cpumask cm, hm;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_cpu_trap utrap = { 0 };
+ bool userspace_exit = false;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ const struct kvm_vcpu_sbi_extension *sbi_ext;
+ struct kvm_cpu_trap utrap = { 0 };
+ unsigned long out_val = 0;
+ bool ext_is_v01 = false;
- if (!cp)
- return -EINVAL;
-
- switch (cp->a7) {
- case SBI_EXT_0_1_CONSOLE_GETCHAR:
- case SBI_EXT_0_1_CONSOLE_PUTCHAR:
- /*
- * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
- * handled in kernel so we forward these to user-space
- */
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_SET_TIMER:
-#if __riscv_xlen == 32
- next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
-#else
- next_cycle = (u64)cp->a0;
+ sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7);
+ if (sbi_ext && sbi_ext->handler) {
+#ifdef CONFIG_RISCV_SBI_V01
+ if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
+ cp->a7 <= SBI_EXT_0_1_SHUTDOWN)
+ ext_is_v01 = true;
#endif
- kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
- break;
- case SBI_EXT_0_1_CLEAR_IPI:
- kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
- break;
- case SBI_EXT_0_1_SEND_IPI:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
- }
- break;
- case SBI_EXT_0_1_SHUTDOWN:
- kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_REMOTE_FENCE_I:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- cpumask_clear(&cm);
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- if (rvcpu->cpu < 0)
- continue;
- cpumask_set_cpu(rvcpu->cpu, &cm);
- }
- riscv_cpuid_to_hartid_mask(&cm, &hm);
- if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
- sbi_remote_fence_i(cpumask_bits(&hm));
- else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
- sbi_remote_hfence_vvma(cpumask_bits(&hm),
- cp->a1, cp->a2);
- else
- sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
- cp->a1, cp->a2, cp->a3);
- break;
- default:
+ ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit);
+ } else {
/* Return error for unsupported SBI calls */
cp->a0 = SBI_ERR_NOT_SUPPORTED;
- break;
+ goto ecall_done;
+ }
+
+ /* Handle special error cases i.e trap, exit or userspace forward */
+ if (utrap.scause) {
+ /* No need to increment sepc or exit ioctl loop */
+ ret = 1;
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ goto ecall_done;
}
+ /* Exit ioctl loop or Propagate the error code the guest */
+ if (userspace_exit) {
+ next_sepc = false;
+ ret = 0;
+ } else {
+ /**
+ * SBI extension handler always returns an Linux error code. Convert
+ * it to the SBI specific error code that can be propagated the SBI
+ * caller.
+ */
+ ret = kvm_linux_err_map_sbi(ret);
+ cp->a0 = ret;
+ ret = 1;
+ }
+ecall_done:
if (next_sepc)
cp->sepc += 4;
+ if (!ext_is_v01)
+ cp->a1 = out_val;
return ret;
}
-
-#else
-
-int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- return 0;
-}
-
-#endif
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
new file mode 100644
index 000000000000..4ecf377f483b
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *trap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct sbiret ecall_ret;
+
+ switch (cp->a6) {
+ case SBI_EXT_BASE_GET_SPEC_VERSION:
+ *out_val = (KVM_SBI_VERSION_MAJOR <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) |
+ KVM_SBI_VERSION_MINOR;
+ break;
+ case SBI_EXT_BASE_GET_IMP_ID:
+ *out_val = KVM_SBI_IMPID;
+ break;
+ case SBI_EXT_BASE_GET_IMP_VERSION:
+ *out_val = 0;
+ break;
+ case SBI_EXT_BASE_PROBE_EXT:
+ if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
+ cp->a0 <= SBI_EXT_EXPERIMENTAL_END) ||
+ (cp->a0 >= SBI_EXT_VENDOR_START &&
+ cp->a0 <= SBI_EXT_VENDOR_END)) {
+ /*
+ * For experimental/vendor extensions
+ * forward it to the userspace
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ } else
+ *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0;
+ break;
+ case SBI_EXT_BASE_GET_MVENDORID:
+ case SBI_EXT_BASE_GET_MARCHID:
+ case SBI_EXT_BASE_GET_MIMPID:
+ ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0);
+ if (!ecall_ret.error)
+ *out_val = ecall_ret.value;
+ /*TODO: We are unnecessarily converting the error twice */
+ ret = sbi_err_map_linux_errno(ecall_ret.error);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = {
+ .extid_start = SBI_EXT_BASE,
+ .extid_end = SBI_EXT_BASE,
+ .handler = kvm_sbi_ext_base_handler,
+};
+
+static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ /*
+ * Both SBI experimental and vendor extensions are
+ * unconditionally forwarded to userspace.
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
+ .extid_start = SBI_EXT_EXPERIMENTAL_START,
+ .extid_end = SBI_EXT_EXPERIMENTAL_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
+ .extid_start = SBI_EXT_VENDOR_START,
+ .extid_end = SBI_EXT_VENDOR_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
new file mode 100644
index 000000000000..2e383687fa48
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *reset_cntx;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm_vcpu *target_vcpu;
+ unsigned long target_vcpuid = cp->a0;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return -EALREADY;
+
+ reset_cntx = &target_vcpu->arch.guest_reset_context;
+ /* start address */
+ reset_cntx->sepc = cp->a1;
+ /* target vcpu id to start */
+ reset_cntx->a0 = target_vcpuid;
+ /* private data passed from kernel */
+ reset_cntx->a1 = cp->a2;
+ kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
+
+ kvm_riscv_vcpu_power_on(target_vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.power_off)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_power_off(vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long target_vcpuid = cp->a0;
+ struct kvm_vcpu *target_vcpu;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return SBI_HSM_HART_STATUS_STARTED;
+ else
+ return SBI_HSM_HART_STATUS_STOPPED;
+}
+
+static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_HSM_HART_START:
+ mutex_lock(&kvm->lock);
+ ret = kvm_sbi_hsm_vcpu_start(vcpu);
+ mutex_unlock(&kvm->lock);
+ break;
+ case SBI_EXT_HSM_HART_STOP:
+ ret = kvm_sbi_hsm_vcpu_stop(vcpu);
+ break;
+ case SBI_EXT_HSM_HART_STATUS:
+ ret = kvm_sbi_hsm_vcpu_get_status(vcpu);
+ if (ret >= 0) {
+ *out_val = ret;
+ ret = 0;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = {
+ .extid_start = SBI_EXT_HSM,
+ .extid_end = SBI_EXT_HSM,
+ .handler = kvm_sbi_ext_hsm_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
new file mode 100644
index 000000000000..1bc0608a5bfd
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ u64 next_cycle;
+
+ if (cp->a6 != SBI_EXT_TIME_SET_TIMER)
+ return -EINVAL;
+
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = {
+ .extid_start = SBI_EXT_TIME,
+ .extid_end = SBI_EXT_TIME,
+ .handler = kvm_sbi_ext_time_handler,
+};
+
+static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+
+ if (cp->a6 != SBI_EXT_IPI_SEND_IPI)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = {
+ .extid_start = SBI_EXT_IPI,
+ .extid_end = SBI_EXT_IPI,
+ .handler = kvm_sbi_ext_ipi_handler,
+};
+
+static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct cpumask cm;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+ unsigned long funcid = cp->a6;
+
+ cpumask_clear(&cm);
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ if (tmp->cpu < 0)
+ continue;
+ cpumask_set_cpu(tmp->cpu, &cm);
+ }
+
+ switch (funcid) {
+ case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+ ret = sbi_remote_fence_i(&cm);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+ ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
+ cp->a3, cp->a4);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+ /* TODO: implement for nested hypervisor case */
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = {
+ .extid_start = SBI_EXT_RFENCE,
+ .extid_end = SBI_EXT_RFENCE,
+ .handler = kvm_sbi_ext_rfence_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
new file mode 100644
index 000000000000..07e2de14433a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+ struct kvm_run *run, u32 type)
+{
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ ulong hmask;
+ int i, ret = 0;
+ u64 next_cycle;
+ struct kvm_vcpu *rvcpu;
+ struct cpumask cm;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ switch (cp->a7) {
+ case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ /*
+ * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
+ * handled in kernel so we forward these to user-space
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+ break;
+ case SBI_EXT_0_1_CLEAR_IPI:
+ ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
+ break;
+ case SBI_EXT_0_1_SEND_IPI:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+ break;
+ case SBI_EXT_0_1_SHUTDOWN:
+ kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_REMOTE_FENCE_I:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ cpumask_clear(&cm);
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ if (rvcpu->cpu < 0)
+ continue;
+ cpumask_set_cpu(rvcpu->cpu, &cm);
+ }
+ if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
+ ret = sbi_remote_fence_i(&cm);
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
+ ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
+ else
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = SBI_EXT_0_1_SET_TIMER,
+ .extid_end = SBI_EXT_0_1_SHUTDOWN,
+ .handler = kvm_sbi_ext_v01_handler,
+};
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index fb18af34a4b5..c768f75279ef 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -46,15 +46,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -82,6 +74,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
+ case KVM_CAP_VM_GPA_BITS:
+ r = kvm_riscv_stage2_gpa_bits();
+ break;
default:
r = 0;
break;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 2c6253b293bc..2fa4f7b1813d 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -65,9 +65,8 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *v;
- struct cpumask hmask;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
@@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+ sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
}
vmid->vmid = vmid_next;
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 63bc691cff91..8c475f4da308 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,15 +1,13 @@
#include <linux/linkage.h>
#include <asm-generic/export.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
#include <asm/csr.h>
.macro fixup op reg addr lbl
100:
\op \reg, \addr
- .section __ex_table,"a"
- .balign RISCV_SZPTR
- RISCV_PTR 100b, \lbl
- .previous
+ _asm_extable 100b, \lbl
.endm
ENTRY(__asm_copy_to_user)
@@ -173,6 +171,13 @@ ENTRY(__asm_copy_from_user)
csrc CSR_STATUS, t6
li a0, 0
ret
+
+ /* Exception fixup code */
+10:
+ /* Disable access to user memory */
+ csrs CSR_STATUS, t6
+ mv a0, t5
+ ret
ENDPROC(__asm_copy_to_user)
ENDPROC(__asm_copy_from_user)
EXPORT_SYMBOL(__asm_copy_to_user)
@@ -218,19 +223,12 @@ ENTRY(__clear_user)
addi a0, a0, 1
bltu a0, a3, 5b
j 3b
-ENDPROC(__clear_user)
-EXPORT_SYMBOL(__clear_user)
- .section .fixup,"ax"
- .balign 4
- /* Fixup code for __copy_user(10) and __clear_user(11) */
-10:
- /* Disable access to user memory */
- csrs CSR_STATUS, t6
- mv a0, t5
- ret
+ /* Exception fixup code */
11:
+ /* Disable access to user memory */
csrs CSR_STATUS, t6
mv a0, a1
ret
- .previous
+ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 89f81067e09e..6cb7d96ad9c7 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
- cpumask_t hartid_mask;
-
- riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
- sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ea54cc0c9106..7acbfbd14557 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
switch_mm_fast:
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
((cntx & asid_mask) << SATP_ASID_SHIFT) |
- SATP_MODE);
+ satp_mode);
if (need_flush_tlb)
local_flush_tlb_all();
@@ -201,7 +201,7 @@ switch_mm_fast:
static void set_mm_noasid(struct mm_struct *mm)
{
/* Switch the page table and blindly nuke entire local TLB */
- csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
local_flush_tlb_all();
}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index ddb7d3b99e89..05978f78579f 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -7,27 +7,65 @@
*/
+#include <linux/bitfield.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
+#include <asm/ptrace.h>
-#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs);
-#endif
+static inline unsigned long
+get_ex_fixup(const struct exception_table_entry *ex)
+{
+ return ((unsigned long)&ex->fixup + ex->fixup);
+}
+
+static bool ex_handler_fixup(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
+
+static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
+ unsigned long val)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return;
+
+ if (!offset)
+ *(unsigned long *)((unsigned long)regs + offset) = val;
+}
+
+static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+ int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
+
+ regs_set_gpr(regs, reg_err, -EFAULT);
+ regs_set_gpr(regs, reg_zero, 0);
+
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
-int fixup_exception(struct pt_regs *regs)
+bool fixup_exception(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
+ const struct exception_table_entry *ex;
- fixup = search_exception_tables(regs->epc);
- if (!fixup)
- return 0;
+ ex = search_exception_tables(regs->epc);
+ if (!ex)
+ return false;
-#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
- if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END)
- return rv_bpf_fixup_exception(fixup, regs);
-#endif
+ switch (ex->type) {
+ case EX_TYPE_FIXUP:
+ return ex_handler_fixup(ex, regs);
+ case EX_TYPE_BPF:
+ return ex_handler_bpf(ex, regs);
+ case EX_TYPE_UACCESS_ERR_ZERO:
+ return ex_handler_uaccess_err_zero(ex, regs);
+ }
- regs->epc = fixup->fixup;
- return 1;
+ BUG();
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index aa08dd2f8fae..4e9efbe46d5f 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -31,7 +31,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
bust_spinlocks(0);
die(regs, "Oops");
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
static inline void no_context(struct pt_regs *regs, unsigned long addr)
@@ -235,7 +235,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
vmalloc_fault(regs, code, addr);
return;
}
@@ -330,7 +330,7 @@ good_area:
if (fault_signal_pending(fault, regs))
return;
- if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ if (unlikely(fault & VM_FAULT_RETRY)) {
flags |= FAULT_FLAG_TRIED;
/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 24b2b8044602..cf4d018b7d66 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map);
#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif
+#ifdef CONFIG_64BIT
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+#else
+u64 satp_mode = SATP_MODE_32;
+#endif
+EXPORT_SYMBOL(satp_mode);
+
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+EXPORT_SYMBOL(pgtable_l4_enabled);
+
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
-#ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[], __data_loc;
-#endif
-
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -53,15 +59,6 @@ extern char _start[];
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
-struct pt_alloc_ops {
- pte_t *(*get_pte_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pte)(uintptr_t va);
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_t *(*get_pmd_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pmd)(uintptr_t va);
-#endif
-};
-
static phys_addr_t dma32_phys_limit __initdata;
static void __init zone_sizes_init(void)
@@ -102,10 +99,14 @@ static void __init print_vm_layout(void)
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
-#ifdef CONFIG_64BIT
- print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
- (unsigned long)ADDRESS_SPACE_END);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+#ifdef CONFIG_KASAN
+ print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
+
+ print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+ }
}
#else
static void print_vm_layout(void) { }
@@ -130,18 +131,8 @@ void __init mem_init(void)
print_vm_layout();
}
-/*
- * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
- * whereas for 64-bit kernel, the end of the virtual address space is occupied
- * by the modules/BPF/kernel mappings which reduces the available size of the
- * linear mapping.
- * Limit the memory size via mem.
- */
-#ifdef CONFIG_64BIT
-static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
-#else
-static phys_addr_t memory_limit = -PAGE_OFFSET;
-#endif
+/* Limit the memory size via mem. */
+static phys_addr_t memory_limit;
static int __init early_mem(char *p)
{
@@ -162,35 +153,31 @@ early_param("mem", early_mem);
static void __init setup_bootmem(void)
{
phys_addr_t vmlinux_end = __pa_symbol(&_end);
- phys_addr_t vmlinux_start = __pa_symbol(&_start);
- phys_addr_t __maybe_unused max_mapped_addr;
- phys_addr_t phys_ram_end;
+ phys_addr_t max_mapped_addr;
+ phys_addr_t phys_ram_end, vmlinux_start;
-#ifdef CONFIG_XIP_KERNEL
- vmlinux_start = __pa_symbol(&_sdata);
-#endif
+ if (IS_ENABLED(CONFIG_XIP_KERNEL))
+ vmlinux_start = __pa_symbol(&_sdata);
+ else
+ vmlinux_start = __pa_symbol(&_start);
memblock_enforce_memory_limit(memory_limit);
/*
- * Reserve from the start of the kernel to the end of the kernel
- */
-#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
- /*
* Make sure we align the reservation on PMD_SIZE since we will
* map the kernel in the linear mapping as read-only: we do not want
* any allocation to happen between _end and the next pmd aligned page.
*/
- vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
-#endif
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
-
phys_ram_end = memblock_end_of_DRAM();
-#ifndef CONFIG_64BIT
-#ifndef CONFIG_XIP_KERNEL
- phys_ram_base = memblock_start_of_DRAM();
-#endif
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ phys_ram_base = memblock_start_of_DRAM();
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -200,10 +187,11 @@ static void __init setup_bootmem(void)
* address space is occupied by the kernel mapping then this check must
* be done as soon as the kernel mapping base address is determined.
*/
- max_mapped_addr = __pa(~(ulong)0);
- if (max_mapped_addr == (phys_ram_end - 1))
- memblock_set_current_limit(max_mapped_addr - 4096);
-#endif
+ if (!IS_ENABLED(CONFIG_64BIT)) {
+ max_mapped_addr = __pa(~(ulong)0);
+ if (max_mapped_addr == (phys_ram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+ }
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
@@ -229,13 +217,7 @@ static void __init setup_bootmem(void)
}
#ifdef CONFIG_MMU
-static struct pt_alloc_ops _pt_ops __initdata;
-
-#ifdef CONFIG_XIP_KERNEL
-#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
-#else
-#define pt_ops _pt_ops
-#endif
+struct pt_alloc_ops pt_ops __initdata;
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
@@ -245,9 +227,11 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
+#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -333,6 +317,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
+static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
+#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
+#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
+#endif /* CONFIG_XIP_KERNEL */
+
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
/* Before MMU is enabled */
@@ -352,7 +346,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+ BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
return (uintptr_t)early_pmd;
}
@@ -367,7 +361,8 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va)
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr);
+ BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));
+
return __pa(vaddr);
}
@@ -398,21 +393,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
create_pte_mapping(ptep, va, pa, sz, prot);
}
-#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt_early(phys_addr_t pa)
+{
+ return (pud_t *)((uintptr_t)pa);
+}
+
+static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PUD);
+ return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+}
+
+static pud_t *__init get_pud_virt_late(phys_addr_t pa)
+{
+ return (pud_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_pud_early(uintptr_t va)
+{
+ /* Only one PUD is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pud;
+}
+
+static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pud_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pmd_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pud_index = pud_index(va);
+
+ if (sz == PUD_SIZE) {
+ if (pud_val(pudp[pud_index]) == 0)
+ pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pud_val(pudp[pud_index]) == 0) {
+ next_phys = pt_ops.alloc_pmd(va);
+ pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ }
+
+ create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t pud_t
+#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
+#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
- create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pmd
+ (pgtable_l4_enabled ? \
+ create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
+#define fixmap_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
+#define trampoline_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
+#define early_dtb_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pte
+#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
+#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
@@ -451,6 +522,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
+extern char _xiprom[], _exiprom[], __data_loc;
+
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
@@ -499,6 +572,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
+#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L3;
+ satp_mode = SATP_MODE_39;
+}
+
+/*
+ * There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+static __init void set_satp_mode(void)
+{
+ u64 identity_satp, hw_satp;
+ uintptr_t set_satp_mode_pmd;
+
+ set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ PGDIR_SIZE, PAGE_TABLE);
+ create_pud_mapping(early_pud,
+ set_satp_mode_pmd, (uintptr_t)early_pmd,
+ PUD_SIZE, PAGE_TABLE);
+ /* Handle the case where set_satp_mode straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd, set_satp_mode_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd + PMD_SIZE,
+ set_satp_mode_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+
+ local_flush_tlb_all();
+ csr_write(CSR_SATP, identity_satp);
+ hw_satp = csr_swap(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ if (hw_satp != identity_satp)
+ disable_pgtable_l4();
+
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pud, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
+
/*
* setup_vm() is called from head.S with MMU-off.
*
@@ -563,10 +687,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+ IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+ if (pgtable_l4_enabled) {
+ create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+ }
+
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
pa, PMD_SIZE, PAGE_KERNEL);
@@ -588,11 +717,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
dtb_early_pa = dtb_pa;
}
+/*
+ * MMU is not enabled, the page tables are allocated directly using
+ * early_pmd/pud/p4d and the address returned is the physical one.
+ */
+void __init pt_ops_set_early(void)
+{
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+ pt_ops.alloc_pud = alloc_pud_early;
+ pt_ops.get_pud_virt = get_pud_virt_early;
+#endif
+}
+
+/*
+ * MMU is enabled but page table setup is not complete yet.
+ * fixmap page table alloc functions must be used as a means to temporarily
+ * map the allocated physical pages since the linear mapping does not exist yet.
+ *
+ * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
+ * but it will be used as described above.
+ */
+void __init pt_ops_set_fixmap(void)
+{
+ pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
+ pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
+ pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
+ pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
+ pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+#endif
+}
+
+/*
+ * MMU is enabled and page table setup is complete, so from now, we can use
+ * generic page allocation functions to setup page table.
+ */
+void __init pt_ops_set_late(void)
+{
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+ pt_ops.alloc_pud = alloc_pud_late;
+ pt_ops.get_pud_virt = get_pud_virt_late;
+#endif
+}
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
kernel_map.virt_addr = KERNEL_LINK_ADDR;
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#ifdef CONFIG_XIP_KERNEL
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
@@ -607,11 +789,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_satp_mode();
+#endif
+
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
+ /*
+ * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
+ * kernel, whereas for 64-bit kernel, the end of the virtual address
+ * space is occupied by the modules/BPF/kernel mappings which reduces
+ * the available size of the linear mapping.
+ */
+ memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
+
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
@@ -624,23 +819,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
- pt_ops.alloc_pte = alloc_pte_early;
- pt_ops.get_pte_virt = get_pte_virt_early;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_early;
- pt_ops.get_pmd_virt = get_pmd_virt_early;
-#endif
+ pt_ops_set_early();
+
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
- (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup fixmap PMD */
+ /* Setup fixmap PUD and PMD */
+ if (pgtable_l4_enabled)
+ create_pud_mapping(fixmap_pud, FIXADDR_START,
+ (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
- (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l4_enabled)
+ create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
#ifdef CONFIG_XIP_KERNEL
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
@@ -668,7 +865,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
* range can not span multiple pmds.
*/
- BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#ifndef __PAGETABLE_PMD_FOLDED
@@ -693,6 +890,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
#endif
+
+ pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
@@ -701,16 +900,6 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end;
u64 i;
- /**
- * MMU is enabled at this point. But page table setup is not complete yet.
- * fixmap page table alloc functions should be used at this point
- */
- pt_ops.alloc_pte = alloc_pte_fixmap;
- pt_ops.get_pte_virt = get_pte_virt_fixmap;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_fixmap;
- pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
-#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
@@ -735,26 +924,24 @@ static void __init setup_vm_final(void)
}
}
-#ifdef CONFIG_64BIT
/* Map the kernel */
- create_kernel_page_table(swapper_pg_dir, false);
+ if (IS_ENABLED(CONFIG_64BIT))
+ create_kernel_page_table(swapper_pg_dir, false);
+
+#ifdef CONFIG_KASAN
+ kasan_swapper_init();
#endif
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
+ clear_fixmap(FIX_PUD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
- /* generic page allocation functions must be used to setup page table */
- pt_ops.alloc_pte = alloc_pte_late;
- pt_ops.get_pte_virt = get_pte_virt_late;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_late;
- pt_ops.get_pmd_virt = get_pmd_virt_late;
-#endif
+ pt_ops_set_late();
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -790,12 +977,10 @@ static void __init reserve_crashkernel(void)
* since it doesn't make much sense and we have limited memory
* resources.
*/
-#ifdef CONFIG_CRASH_DUMP
if (is_kdump_kernel()) {
pr_info("crashkernel: ignoring reservation request\n");
return;
}
-#endif
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
@@ -812,13 +997,22 @@ static void __init reserve_crashkernel(void)
/*
* Current riscv boot protocol requires 2MB alignment for
* RV64 and 4MB alignment for RV32 (hugepage size)
+ *
+ * Try to alloc from 32bit addressible physical memory so that
+ * swiotlb can work on the crash kernel.
*/
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
- search_start, search_end);
+ search_start,
+ min(search_end, (unsigned long) SZ_4G));
if (crash_base == 0) {
- pr_warn("crashkernel: couldn't allocate %lldKB\n",
- crash_size >> 10);
- return;
+ /* Try again without restricting region to 32bit addressible memory */
+ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+ search_start, search_end);
+ if (crash_base == 0) {
+ pr_warn("crashkernel: couldn't allocate %lldKB\n",
+ crash_size >> 10);
+ return;
+ }
}
pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 54294f83513d..f61f7ca6fe0f 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -11,45 +11,27 @@
#include <asm/fixmap.h>
#include <asm/pgalloc.h>
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
-asmlinkage void __init kasan_early_init(void)
-{
- uintptr_t i;
- pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
-
- BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
- KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
-
- for (i = 0; i < PTRS_PER_PTE; ++i)
- set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
-
- for (i = 0; i < PTRS_PER_PMD; ++i)
- set_pmd(kasan_early_shadow_pmd + i,
- pfn_pmd(PFN_DOWN
- (__pa((uintptr_t) kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
-
- /* init for swapper_pg_dir */
- pgd = pgd_offset_k(KASAN_SHADOW_START);
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+/*
+ * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
+ * which is right before the kernel.
+ *
+ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
+ * the page global directory with kasan_early_shadow_pmd.
+ *
+ * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
+ * must be divided as follows:
+ * - the first PGD entry, although incomplete, is populated with
+ * kasan_early_shadow_pud/p4d
+ * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
+ * - the last PGD entry is shared with the kernel mapping so populated at the
+ * lower levels pud/p4d
+ *
+ * In addition, when shallow populating a kasan region (for example vmalloc),
+ * this region may also not be aligned on PGDIR size, so we must go down to the
+ * pud level too.
+ */
- local_flush_tlb_all();
-}
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
@@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
-static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pmd_t *pmdp, *base_pmd;
unsigned long next;
- base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
- if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ if (pud_none(*pud)) {
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ } else {
+ base_pmd = (pmd_t *)pud_pgtable(*pud);
+ if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ }
pmdp = base_pmd + pmd_index(vaddr);
@@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pud(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ pud_t *pudp, *base_pud;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_pud = (pud_t *)pgd_page_vaddr(*pgd);
+ if (base_pud == lm_alias(kasan_early_shadow_pud))
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ }
+
+ pudp = base_pud + pud_index(vaddr);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+ if (phys_addr) {
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pmd(pudp, vaddr, next);
+ } while (pudp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
-static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)kasan_early_shadow_pud : \
+ (uintptr_t)kasan_early_shadow_pmd)
+#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l4_enabled ? \
+ kasan_populate_pud(pgdp, vaddr, next, early) : \
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+
+static void __init kasan_populate_pgd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool early)
{
phys_addr_t phys_addr;
- pgd_t *pgdp = pgd_offset_k(vaddr);
unsigned long next;
do {
next = pgd_addr_end(vaddr, end);
- /*
- * pgdp can't be none since kasan_early_init initialized all KASAN
- * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
- * that means we can try to allocate a hugepage as a replacement.
- */
- if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
- IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
- phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
- if (phys_addr) {
- set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+ if (early) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
+ } else if (pgd_page_vaddr(*pgdp) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
+ /*
+ * pgdp can't be none since kasan_early_init
+ * initialized all KASAN shadow region with
+ * kasan_early_shadow_pud: if this is still the
+ * case, that means we can try to allocate a
+ * hugepage as a replacement.
+ */
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
}
}
- kasan_populate_pmd(pgdp, vaddr, next);
+ kasan_populate_pgd_next(pgdp, vaddr, next, early);
} while (pgdp++, vaddr = next, vaddr != end);
}
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t)kasan_early_shadow_pte)),
+ PAGE_TABLE));
+
+ if (pgtable_l4_enabled) {
+ for (i = 0; i < PTRS_PER_PUD; ++i)
+ set_pud(kasan_early_shadow_pud + i,
+ pfn_pud(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ PAGE_TABLE));
+ }
+
+ kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
+void __init kasan_swapper_init(void)
+{
+ kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
static void __init kasan_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- kasan_populate_pgd(vaddr, vend);
+ kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
local_flush_tlb_all();
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool kasan_populate)
+{
+ unsigned long next;
+ pud_t *pudp, *base_pud;
+ pmd_t *base_pmd;
+ bool is_kasan_pmd;
+
+ base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
+ pudp = base_pud + pud_index(vaddr);
+
+ if (kasan_populate)
+ memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
+ sizeof(pud_t) * PTRS_PER_PUD);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+ is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
+
+ if (is_kasan_pmd) {
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ }
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
void *p;
pgd_t *pgd_k = pgd_offset_k(vaddr);
+ bool is_kasan_pgd_next;
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
+ is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
+
+ if (is_kasan_pgd_next) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
+
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
+ continue;
+
+ kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 64f8201237c2..37ed760d007c 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
struct cpumask *cmask = mm_cpumask(mm);
- struct cpumask hmask;
unsigned int cpuid;
bool broadcast;
@@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long asid = atomic_long_read(&mm->context.id);
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma_asid(cpumask_bits(&hmask),
- start, size, asid);
+ sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else if (size <= stride) {
local_flush_tlb_page_asid(start, asid);
} else {
@@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
}
} else {
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(cpumask_bits(&hmask),
- start, size);
+ sbi_remote_sfence_vma(cmask, start, size);
} else if (size <= stride) {
local_flush_tlb_page(start);
} else {
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 603630b6f3c5..0bcda99d1d68 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -458,10 +458,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
- struct pt_regs *regs);
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
- struct pt_regs *regs)
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
@@ -469,7 +467,7 @@ int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
regs->epc = (unsigned long)&ex->fixup - offset;
- return 1;
+ return true;
}
/* For accesses to BTF pointers, add an entry to the exception table */
@@ -499,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
return -ERANGE;
- ex->insn = pc;
+ ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
@@ -515,6 +513,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+ ex->type = EX_TYPE_BPF;
ctx->nexentries++;
return 0;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a9475cbc8c..9750f92380f5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,7 +127,6 @@ config S390
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 3b860061e84d..d04e0e7de0b3 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -58,8 +58,6 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-
suffix-$(CONFIG_KERNEL_GZIP) := .gz
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
suffix-$(CONFIG_KERNEL_LZ4) := .lz4
@@ -68,20 +66,20 @@ suffix-$(CONFIG_KERNEL_LZO) := .lzo
suffix-$(CONFIG_KERNEL_XZ) := .xz
suffix-$(CONFIG_KERNEL_ZSTD) := .zst
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,xzkern)
-$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,zstd22)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,zstd22_with_size)
OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 354e51dcb3e2..7fe8975b49ec 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -96,7 +96,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 8dee6c3782f3..466780c465f5 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -91,7 +91,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 5a530c552c23..1d40630128a5 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -387,7 +387,6 @@ static inline int fls(unsigned int word)
#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 0d90cbeb89b4..e3f12db46cfc 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -109,7 +109,9 @@ struct hws_basic_entry {
unsigned int AS:2; /* 29-30 PSW address-space control */
unsigned int I:1; /* 31 entry valid or invalid */
unsigned int CL:2; /* 32-33 Configuration Level */
- unsigned int:14;
+ unsigned int H:1; /* 34 Host Indicator */
+ unsigned int LS:1; /* 35 Limited Sampling */
+ unsigned int:12;
unsigned int prim_asn:16; /* primary ASN */
unsigned long long ia; /* Instruction Address */
unsigned long long gpp; /* Guest Program Parameter */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a604d51acfc8..a22c9266ea05 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1010,6 +1010,4 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
-
#endif
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index ce550d06abc3..147cb3534ce4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -49,51 +49,85 @@ int __get_user_bad(void) __attribute__((noreturn));
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-#define __put_get_user_asm(to, from, size, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- insn " 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- ".pushsection .fixup, \"ax\"\n" \
- "3: lhi %[rc],%[retval]\n" \
- " jg 2b\n" \
- ".popsection\n" \
- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \
- : "cc", "0"); \
- __rc; \
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
+
+#define __put_get_user_asm(to, from, size, oac_spec) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ " lr 0,%[spec]\n" \
+ "0: mvcos %[_to],%[_from],%[_size]\n" \
+ "1: xr %[rc],%[rc]\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %[rc],%[retval]\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
+ : [_size] "d" (size), [_from] "Q" (*(from)), \
+ [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \
+ : "cc", "0"); \
+ __rc; \
})
+#define __put_user_asm(to, from, size) \
+ __put_get_user_asm(to, from, size, ((union oac) { \
+ .oac1.as = PSW_BITS_AS_SECONDARY, \
+ .oac1.a = 1 \
+ }))
+
+#define __get_user_asm(to, from, size) \
+ __put_get_user_asm(to, from, size, ((union oac) { \
+ .oac2.as = PSW_BITS_AS_SECONDARY, \
+ .oac2.a = 1 \
+ })) \
+
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
int rc;
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char __user *)ptr,
- (unsigned char *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned char __user *)ptr,
+ (unsigned char *)x,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short __user *)ptr,
- (unsigned short *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned short __user *)ptr,
+ (unsigned short *)x,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int __user *)ptr,
- (unsigned int *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned int __user *)ptr,
+ (unsigned int *)x,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long __user *)ptr,
- (unsigned long *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned long __user *)ptr,
+ (unsigned long *)x,
+ size);
break;
default:
__put_user_bad();
@@ -108,24 +142,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char *)x,
- (unsigned char __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned char *)x,
+ (unsigned char __user *)ptr,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short *)x,
- (unsigned short __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned short *)x,
+ (unsigned short __user *)ptr,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int *)x,
- (unsigned int __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned int *)x,
+ (unsigned int __user *)ptr,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long *)x,
- (unsigned long __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned long *)x,
+ (unsigned long __user *)ptr,
+ size);
break;
default:
__get_user_bad();
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 72d3e49c2860..86218382d29c 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -91,23 +91,23 @@ struct uv_cb_header {
/* Query Ultravisor Information */
struct uv_cb_qui {
- struct uv_cb_header header;
- u64 reserved08;
- u64 inst_calls_list[4];
- u64 reserved30[2];
- u64 uv_base_stor_len;
- u64 reserved48;
- u64 conf_base_phys_stor_len;
- u64 conf_base_virt_stor_len;
- u64 conf_virt_var_stor_len;
- u64 cpu_stor_len;
- u32 reserved70[3];
- u32 max_num_sec_conf;
- u64 max_guest_stor_addr;
- u8 reserved88[158 - 136];
- u16 max_guest_cpu_id;
- u64 uv_feature_indications;
- u8 reserveda0[200 - 168];
+ struct uv_cb_header header; /* 0x0000 */
+ u64 reserved08; /* 0x0008 */
+ u64 inst_calls_list[4]; /* 0x0010 */
+ u64 reserved30[2]; /* 0x0030 */
+ u64 uv_base_stor_len; /* 0x0040 */
+ u64 reserved48; /* 0x0048 */
+ u64 conf_base_phys_stor_len; /* 0x0050 */
+ u64 conf_base_virt_stor_len; /* 0x0058 */
+ u64 conf_virt_var_stor_len; /* 0x0060 */
+ u64 cpu_stor_len; /* 0x0068 */
+ u32 reserved70[3]; /* 0x0070 */
+ u32 max_num_sec_conf; /* 0x007c */
+ u64 max_guest_stor_addr; /* 0x0080 */
+ u8 reserved88[158 - 136]; /* 0x0088 */
+ u16 max_guest_cpu_id; /* 0x009e */
+ u64 uv_feature_indications; /* 0x00a0 */
+ u8 reserveda8[200 - 168]; /* 0x00a8 */
} __packed __aligned(8);
/* Initialize Ultravisor */
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 0681c55e831d..1e3233eb510a 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -224,5 +224,5 @@ void __noreturn die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception: panic_on_oops");
oops_exit();
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index b01ba460b7ca..d52d85367bf7 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -37,14 +37,15 @@
void *module_alloc(unsigned long size)
{
+ gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 1cf1e37553e8..0c9e894913dc 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -166,7 +166,7 @@ void __s390_handle_mcck(void)
"malfunction (code 0x%016lx).\n", mcck.mcck_code);
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
current->comm, current->pid);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
}
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
index 30f0242de4a5..8ee48672233f 100644
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ b/arch/s390/kernel/perf_cpum_cf_common.c
@@ -178,7 +178,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
case CPUMF_CTR_SET_CRYPTO:
if (info->csvn >= 1 && info->csvn <= 5)
ctrset_size = 16;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
@@ -188,7 +188,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
ctrset_size = 48;
else if (info->csvn >= 3 && info->csvn <= 5)
ctrset_size = 128;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 37265f551a11..52c1fe23b823 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -344,7 +344,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -715,8 +715,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 1 ... 5:
csvn = cpumcf_svn_12345_pmu_event_attr;
break;
- case 6:
- csvn = cpumcf_svn_6_pmu_event_attr;
+ case 6 ... 7:
+ csvn = cpumcf_svn_67_pmu_event_attr;
break;
default:
csvn = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index db62def4ef28..332a49965130 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1179,7 +1179,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
sample = (struct hws_basic_entry *) *sdbt;
while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */
- if (!sample->def)
+ if (!sample->def || sample->LS)
break;
/* Update perf event period */
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index ed9c5c2eafad..799147658dee 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -452,3 +452,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 67a8e770e369..2e84d3922f7c 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -33,6 +33,7 @@ config KVM
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
+ select INTERVAL_TREE
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3aaadc60ead..26f4a74e5ce4 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -3,13 +3,11 @@
#
# Copyright IBM Corp. 2008
-KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o \
- $(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o
+include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6af59c59cc1b..4460808c3b9a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -794,46 +794,100 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
- unsigned long *pages, unsigned long nr_pages,
- const union asce asce, enum gacc_mode mode)
+/**
+ * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
+ * covering a logical range
+ * @vcpu: virtual cpu
+ * @ga: guest address, start of range
+ * @ar: access register
+ * @gpas: output argument, may be NULL
+ * @len: length of range in bytes
+ * @asce: address-space-control element to use for translation
+ * @mode: access mode
+ *
+ * Translate a logical range to a series of guest absolute addresses,
+ * such that the concatenation of page fragments starting at each gpa make up
+ * the whole range.
+ * The translation is performed as if done by the cpu for the given @asce, @ar,
+ * @mode and state of the @vcpu.
+ * If the translation causes an exception, its program interruption code is
+ * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
+ * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
+ * a correct exception into the guest.
+ * The resulting gpas are stored into @gpas, unless it is NULL.
+ *
+ * Note: All fragments except the first one start at the beginning of a page.
+ * When deriving the boundaries of a fragment from a gpa, all but the last
+ * fragment end at the end of the page.
+ *
+ * Return:
+ * * 0 - success
+ * * <0 - translation could not be performed, for example if guest
+ * memory could not be accessed
+ * * >0 - an access exception occurred. In this case the returned value
+ * is the program interruption code and the contents of pgm may
+ * be used to inject an exception into the guest.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *gpas, unsigned long len,
+ const union asce asce, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned int offset = offset_in_page(ga);
+ unsigned int fragment_len;
int lap_enabled, rc = 0;
enum prot_type prot;
+ unsigned long gpa;
lap_enabled = low_address_protection_enabled(vcpu, asce);
- while (nr_pages) {
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
ga = kvm_s390_logical_to_effective(vcpu, ga);
if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
- ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
- *pages = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ gpa = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, gpa))
rc = PGM_ADDRESSING;
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
- ga += PAGE_SIZE;
- pages++;
- nr_pages--;
+ if (gpas)
+ *gpas++ = gpa;
+ offset = 0;
+ ga += fragment_len;
+ len -= fragment_len;
}
return 0;
}
+static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len)
+{
+ const unsigned int offset = offset_in_page(gpa);
+ const gfn_t gfn = gpa_to_gfn(gpa);
+ int rc;
+
+ if (mode == GACC_STORE)
+ rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
+ else
+ rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
+ return rc;
+}
+
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- unsigned long _len, nr_pages, gpa, idx;
- unsigned long pages_array[2];
- unsigned long *pages;
+ unsigned long nr_pages, idx;
+ unsigned long gpa_array[2];
+ unsigned int fragment_len;
+ unsigned long *gpas;
int need_ipte_lock;
union asce asce;
int rc;
@@ -845,49 +899,42 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
- pages = pages_array;
- if (nr_pages > ARRAY_SIZE(pages_array))
- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
- if (!pages)
+ gpas = gpa_array;
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!gpas)
return -ENOMEM;
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
- gpa = *(pages + idx) + (ga & ~PAGE_MASK);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode == GACC_STORE)
- rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
- else
- rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
- len -= _len;
- ga += _len;
- data += _len;
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+ rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len);
+ len -= fragment_len;
+ data += fragment_len;
}
if (need_ipte_lock)
ipte_unlock(vcpu);
- if (nr_pages > ARRAY_SIZE(pages_array))
- vfree(pages);
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ vfree(gpas);
return rc;
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode)
{
- unsigned long _len, gpa;
+ unsigned int fragment_len;
+ unsigned long gpa;
int rc = 0;
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode)
- rc = write_guest_abs(vcpu, gpa, data, _len);
- else
- rc = read_guest_abs(vcpu, gpa, data, _len);
- len -= _len;
- gra += _len;
- data += _len;
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
+ rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
+ len -= fragment_len;
+ gra += fragment_len;
+ data += fragment_len;
}
return rc;
}
@@ -909,8 +956,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- enum prot_type prot;
union asce asce;
int rc;
@@ -918,23 +963,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE)
- return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
- mode, PROT_TYPE_LA);
- }
-
- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
- if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, prot);
- } else {
- *gpa = kvm_s390_real_to_abs(vcpu, gva);
- if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
- }
-
- return rc;
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
}
/**
@@ -948,17 +977,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode)
{
- unsigned long gpa;
- unsigned long currlen;
+ union asce asce;
int rc = 0;
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
ipte_lock(vcpu);
- while (length > 0 && !rc) {
- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
- gva += currlen;
- length -= currlen;
- }
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
ipte_unlock(vcpu);
return rc;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c3bd993fdd0c..db933c252dbc 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1335,7 +1335,8 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
+ vcpu->valid_wakeup = false;
__unset_cpu_idle(vcpu);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2115,6 +2116,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
}
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -2659,7 +2667,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
switch (attr->group) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 14a18ba5ff2c..577f1ead6a51 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -295,7 +295,7 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
unsigned long long *delta = v;
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -682,7 +682,7 @@ out:
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
{
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -936,7 +936,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_s390_vcpu_block_all(kvm);
@@ -1021,7 +1021,7 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
{
- int cx;
+ unsigned long cx;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(cx, vcpu, kvm)
@@ -1037,13 +1037,13 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
struct kvm_memory_slot *ms;
struct kvm_memslots *slots;
unsigned long ram_pages = 0;
- int slotnr;
+ int bkt;
/* migration mode already enabled */
if (kvm->arch.migration_mode)
return 0;
slots = kvm_memslots(kvm);
- if (!slots || !slots->used_slots)
+ if (!slots || kvm_memslots_empty(slots))
return -EINVAL;
if (!kvm->arch.use_cmma) {
@@ -1051,8 +1051,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
return 0;
}
/* mark all the pages in active slots as dirty */
- for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
- ms = slots->memslots + slotnr;
+ kvm_for_each_memslot(ms, bkt, slots) {
if (!ms->dirty_bitmap)
return -EINVAL;
/*
@@ -1943,41 +1942,6 @@ out:
/* for consistency */
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
-/*
- * Similar to gfn_to_memslot, but returns the index of a memslot also when the
- * address falls in a hole. In that case the index of one of the memslots
- * bordering the hole is returned.
- */
-static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
-{
- int start = 0, end = slots->used_slots;
- int slot = atomic_read(&slots->last_used_slot);
- struct kvm_memory_slot *memslots = slots->memslots;
-
- if (gfn >= memslots[slot].base_gfn &&
- gfn < memslots[slot].base_gfn + memslots[slot].npages)
- return slot;
-
- while (start < end) {
- slot = start + (end - start) / 2;
-
- if (gfn >= memslots[slot].base_gfn)
- end = slot;
- else
- start = slot + 1;
- }
-
- if (start >= slots->used_slots)
- return slots->used_slots - 1;
-
- if (gfn >= memslots[start].base_gfn &&
- gfn < memslots[start].base_gfn + memslots[start].npages) {
- atomic_set(&slots->last_used_slot, start);
- }
-
- return start;
-}
-
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
u8 *res, unsigned long bufsize)
{
@@ -2001,27 +1965,32 @@ static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
return 0;
}
+static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
+ gfn_t gfn)
+{
+ return ____gfn_to_memslot(slots, gfn, true);
+}
+
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
unsigned long cur_gfn)
{
- int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
- struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
unsigned long ofs = cur_gfn - ms->base_gfn;
+ struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
if (ms->base_gfn + ms->npages <= cur_gfn) {
- slotidx--;
+ mnode = rb_next(mnode);
/* If we are above the highest slot, wrap around */
- if (slotidx < 0)
- slotidx = slots->used_slots - 1;
+ if (!mnode)
+ mnode = rb_first(&slots->gfn_tree);
- ms = slots->memslots + slotidx;
+ ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = 0;
}
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
- while ((slotidx > 0) && (ofs >= ms->npages)) {
- slotidx--;
- ms = slots->memslots + slotidx;
- ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
+ ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+ ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
}
return ms->base_gfn + ofs;
}
@@ -2033,7 +2002,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
- if (unlikely(!slots->used_slots))
+ if (unlikely(kvm_memslots_empty(slots)))
return 0;
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
@@ -2043,7 +2012,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
if (!ms)
return 0;
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
- mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+ mem_end = kvm_s390_get_gfn_end(slots);
while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn);
@@ -2206,7 +2175,7 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
struct kvm_vcpu *vcpu;
u16 rc, rrc;
int ret = 0;
- int i;
+ unsigned long i;
/*
* We ignore failures and try to destroy as many CPUs as possible.
@@ -2230,7 +2199,8 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{
- int i, r = 0;
+ unsigned long i;
+ int r = 0;
u16 dummy;
struct kvm_vcpu *vcpu;
@@ -2821,27 +2791,11 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)(vcpu->arch.sie_block));
}
-static void kvm_free_vcpus(struct kvm *kvm)
-{
- unsigned int i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_destroy(vcpu);
-
- mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
- mutex_unlock(&kvm->lock);
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
u16 rc, rrc;
- kvm_free_vcpus(kvm);
+ kvm_destroy_vcpus(kvm);
sca_dispose(kvm);
kvm_s390_gisa_destroy(kvm);
/*
@@ -2945,7 +2899,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
struct bsca_block *old_sca = kvm->arch.sca;
struct esca_block *new_sca;
struct kvm_vcpu *vcpu;
- unsigned int vcpu_idx;
+ unsigned long vcpu_idx;
u32 scaol, scaoh;
if (kvm->arch.use_esca)
@@ -3427,7 +3381,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct kvm_vcpu *vcpu;
unsigned long prefix;
- int i;
+ unsigned long i;
if (gmap_is_shadow(gmap))
return;
@@ -3449,7 +3403,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
/* do not poll with more than halt_poll_max_steal percent of steal time */
if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
- halt_poll_max_steal) {
+ READ_ONCE(halt_poll_max_steal)) {
vcpu->stat.halt_no_poll_steal++;
return true;
}
@@ -3920,7 +3874,7 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
union tod_clock clk;
- int i;
+ unsigned long i;
mutex_lock(&kvm->lock);
preempt_disable();
@@ -4552,7 +4506,7 @@ static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
{
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -4590,7 +4544,7 @@ int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
}
for (i = 0; i < online_vcpus; i++) {
- if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+ if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
started_vcpus++;
}
@@ -4645,16 +4599,23 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
}
- /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+ /*
+ * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+ * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+ * have been fully processed. This will ensure that the VCPU
+ * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+ */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
kvm_s390_clear_stop_irq(vcpu);
- kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
- if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+ struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
+
+ if (!is_vcpu_stopped(tmp)) {
started_vcpus++;
- started_vcpu = vcpu->kvm->vcpus[i];
+ started_vcpu = tmp;
}
}
@@ -5020,32 +4981,38 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ gpa_t size;
+
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
+ return 0;
+
/* A few sanity checks. We can have memory slots which have to be
located/ended at a segment boundary (1MB). The memory in userland is
ok to be fragmented into various different vmas. It is okay to mmap()
and munmap() stuff in this slot after doing this call at any time */
- if (mem->userspace_addr & 0xffffful)
+ if (new->userspace_addr & 0xffffful)
return -EINVAL;
- if (mem->memory_size & 0xffffful)
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
return -EINVAL;
- if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
return -EINVAL;
- /* When we are protected, we should not change the memory slots */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -5064,8 +5031,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
break;
fallthrough;
case KVM_MR_CREATE:
- rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
- mem->guest_phys_addr, mem->memory_size);
+ rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
+ new->base_gfn * PAGE_SIZE,
+ new->npages * PAGE_SIZE);
break;
case KVM_MR_FLAGS_ONLY:
break;
@@ -5084,11 +5052,6 @@ static inline unsigned long nonhyp_mask(int i)
return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
}
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
-{
- vcpu->valid_wakeup = false;
-}
-
static int __init kvm_s390_init(void)
{
int i;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c07a050d757d..098831e815e6 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -217,6 +217,20 @@ static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
kvm->arch.user_cpu_state_ctrl = 1;
}
+/* get the end gfn of the last (highest gfn) memslot */
+static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
+{
+ struct rb_node *node;
+ struct kvm_memory_slot *ms;
+
+ if (WARN_ON(kvm_memslots_empty(slots)))
+ return 0;
+
+ node = rb_last(&slots->gfn_tree);
+ ms = container_of(node, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+ return ms->base_gfn + ms->npages;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -357,7 +371,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
WARN_ON(!mutex_is_locked(&kvm->lock));
@@ -367,7 +381,7 @@ static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -427,6 +441,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm);
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
extern struct kvm_device_ops kvm_flic_ops;
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 00d272d134c2..7f7c0d6af2ce 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -116,7 +116,6 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
unsigned long base = uv_info.guest_base_stor_len;
unsigned long virt = uv_info.guest_virt_var_stor_len;
unsigned long npages = 0, vlen = 0;
- struct kvm_memory_slot *memslot;
kvm->arch.pv.stor_var = NULL;
kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
@@ -130,8 +129,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
* Slots are sorted by GFN
*/
mutex_lock(&kvm->slots_lock);
- memslot = kvm_memslots(kvm)->memslots;
- npages = memslot->base_gfn + memslot->npages;
+ npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
mutex_unlock(&kvm->slots_lock);
kvm->arch.pv.guest_len = npages * PAGE_SIZE;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf4de80bd541..8aaee2892ec3 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -276,6 +276,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
+ /*
+ * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+ * are processed asynchronously. Until the affected VCPU finishes
+ * its work and calls back into KVM to clear the (RESTART or STOP)
+ * interrupt, we need to return any new non-reset orders "busy".
+ *
+ * This is important because a single VCPU could issue:
+ * 1) SIGP STOP $DESTINATION
+ * 2) SIGP SENSE $DESTINATION
+ *
+ * If the SIGP SENSE would not be rejected as "busy", it could
+ * return an incorrect answer as to whether the VCPU is STOPPED
+ * or OPERATING.
+ */
+ if (order_code != SIGP_INITIAL_CPU_RESET &&
+ order_code != SIGP_CPU_RESET) {
+ /*
+ * Lockless check. Both SIGP STOP and SIGP (RE)START
+ * properly synchronize everything while processing
+ * their orders, while the guest cannot observe a
+ * difference when issuing other orders from two
+ * different VCPUs.
+ */
+ if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+ kvm_s390_is_restart_irq_pending(dst_vcpu))
+ return SIGP_CC_BUSY;
+ }
+
switch (order_code) {
case SIGP_SENSE:
vcpu->stat.instruction_sigp_sense++;
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index a596e69d3c47..8a5d21461889 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -62,10 +62,14 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " lghi 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
@@ -84,7 +88,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
@@ -135,10 +139,14 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
@@ -157,7 +165,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
@@ -207,10 +215,14 @@ EXPORT_SYMBOL(raw_copy_to_user);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
+ " lr 0,%[spec]\n"
"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
" jz 4f\n"
"1: algr %0,%2\n"
@@ -228,7 +240,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), [spec] "K" (0x81UL)
+ : "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 6ed2886fc014..ff16ce0d04ee 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -452,21 +452,21 @@ retry:
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
- (flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* FAULT_FLAG_RETRY_NOWAIT has been set,
- * mmap_lock has not been released */
- current->thread.gmap_pfault = 1;
- fault = VM_FAULT_PFAULT;
- goto out_up;
- }
- flags &= ~FAULT_FLAG_RETRY_NOWAIT;
- flags |= FAULT_FLAG_TRIED;
- mmap_read_lock(mm);
- goto retry;
+ if (fault & VM_FAULT_RETRY) {
+ if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+ (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /*
+ * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
+ * not been released
+ */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
}
+ flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+ flags |= FAULT_FLAG_TRIED;
+ mmap_read_lock(mm);
+ goto retry;
}
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
address = __gmap_link(gmap, current->thread.gmap_addr,
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index 5c123f5b2797..1f5d2df3c7e0 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -19,12 +19,12 @@ CONFIG_ZERO_PAGE_OFFSET ?= 0x00001000
CONFIG_ENTRY_OFFSET ?= 0x00001000
CONFIG_PHYSICAL_START ?= $(CONFIG_MEMORY_START)
-suffix-y := bin
-suffix-$(CONFIG_KERNEL_GZIP) := gz
-suffix-$(CONFIG_KERNEL_BZIP2) := bz2
-suffix-$(CONFIG_KERNEL_LZMA) := lzma
-suffix-$(CONFIG_KERNEL_XZ) := xz
-suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix_y := bin
+suffix_$(CONFIG_KERNEL_GZIP) := gz
+suffix_$(CONFIG_KERNEL_BZIP2) := bz2
+suffix_$(CONFIG_KERNEL_LZMA) := lzma
+suffix_$(CONFIG_KERNEL_XZ) := xz
+suffix_$(CONFIG_KERNEL_LZO) := lzo
targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \
uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \
@@ -106,10 +106,10 @@ OBJCOPYFLAGS_uImage.srec := -I binary -O srec
$(obj)/uImage.srec: $(obj)/uImage FORCE
$(call if_changed,objcopy)
-$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+$(obj)/uImage: $(obj)/uImage.$(suffix_y)
@ln -sf $(notdir $<) $@
@echo ' Image $@ is ready'
export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
CONFIG_PHYSICAL_START CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET \
- KERNEL_MEMORY suffix-y
+ KERNEL_MEMORY suffix_y
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index cf3174df7859..591125c42d49 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -47,22 +47,20 @@ $(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzma)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,xzkern)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzo)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo_with_size)
OBJCOPYFLAGS += -R .empty_zero_page
LDFLAGS_piggy.o := -r --format binary --oformat $(ld-bfd) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
$(call if_changed,ld)
diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile
index c17d65b82abe..4a6dec9714a9 100644
--- a/arch/sh/boot/dts/Makefile
+++ b/arch/sh/boot/dts/Makefile
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"")
-obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
-endif
+obj-$(CONFIG_USE_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE))
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index 3b6c7b5b7ec9..10ceb0d6b5a9 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -68,6 +68,5 @@ static inline unsigned long __ffs(unsigned long word)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/le.h>
-#include <asm-generic/bitops/find.h>
#endif /* __ASM_SH_BITOPS_H */
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index d9539d28bdaa..2de85c977f54 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -452,3 +452,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index cbe3201d4f21..01884054aeb2 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -57,7 +57,7 @@ void __noreturn die(const char *str, struct pt_regs *regs, long err)
if (panic_on_oops)
panic("Fatal exception");
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
void die_if_kernel(const char *str, struct pt_regs *regs, long err)
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
index fb517b82a87b..3a76a766f423 100644
--- a/arch/sh/mm/alignment.c
+++ b/arch/sh/mm/alignment.c
@@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file)
static ssize_t alignment_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
- int *data = PDE_DATA(file_inode(file));
+ int *data = pde_data(file_inode(file));
char mode;
if (count > 0) {
@@ -161,7 +161,7 @@ static const struct proc_ops alignment_proc_ops = {
};
/*
- * This needs to be done after sysctl_init, otherwise sys/ will be
+ * This needs to be done after sysctl_init_bases(), otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
* it isn't a sysctl, and it doesn't contain sysctl information.
* We now locate it in /proc/cpu/alignment instead.
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 1e1aa75df3ca..e175667b1363 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -485,17 +485,15 @@ good_area:
if (mm_fault_error(regs, error_code, address, fault))
return;
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
-
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- goto retry;
- }
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 66fc08646be5..1cab1b284f1a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -97,6 +97,9 @@ config SPARC64
select PCI_DOMAINS if PCI
select ARCH_HAS_GIGANTIC_PAGE
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_SETUP_PER_CPU_AREA
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
config ARCH_PROC_KCORE_TEXT
def_bool y
@@ -123,15 +126,6 @@ config AUDIT_ARCH
bool
default y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y if SPARC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y if SPARC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y if SPARC64
-
config MMU
bool
default y
diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h
index 0ceff3b915a8..889afa9f990f 100644
--- a/arch/sparc/include/asm/bitops_32.h
+++ b/arch/sparc/include/asm/bitops_32.h
@@ -100,7 +100,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index ca7ea5913494..005a8ae858f1 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -52,8 +52,6 @@ unsigned int __arch_hweight8(unsigned int w);
#include <asm-generic/bitops/lock.h>
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
#include <asm-generic/bitops/le.h>
diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
index 3a66e62eb2a0..ab657b359789 100644
--- a/arch/sparc/kernel/led.c
+++ b/arch/sparc/kernel/led.c
@@ -114,18 +114,16 @@ static const struct proc_ops led_proc_ops = {
};
#endif
-static struct proc_dir_entry *led;
-
#define LED_VERSION "0.1"
static int __init led_init(void)
{
timer_setup(&led_blink_timer, led_blink, 0);
- led = proc_create("led", 0, NULL, &led_proc_ops);
- if (!led)
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("led", 0, NULL, &led_proc_ops))
return -ENOMEM;
-
+#endif
printk(KERN_INFO
"led: version %s, Lars Kotthoff <metalhead@metalhead.ws>\n",
LED_VERSION);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b98a7bbe6728..a1f78e9ddaf3 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1526,50 +1526,6 @@ void smp_send_stop(void)
smp_call_function(stop_this_cpu, NULL, 0);
}
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
@@ -1578,57 +1534,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-static void __init pcpu_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pgd_populate(&init_mm, pgd, new);
- }
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return cpu_to_node(cpu);
}
void __init setup_per_cpu_areas(void)
@@ -1641,8 +1549,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, 4 << 20,
pcpu_cpu_distance,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -1650,9 +1557,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_alloc_bootmem,
- pcpu_free_bootmem,
- pcpu_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 46adabcb1720..4398cc6fb68d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -495,3 +495,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 5630e5a395e0..179aabfa712e 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
}
printk("Instruction DUMP:");
instruction_dump ((unsigned long *) regs->pc);
- if(regs->psr & PSR_PS)
- do_exit(SIGKILL);
- do_exit(SIGSEGV);
+ make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV);
}
void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 6863025ed56d..21077821f427 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2559,9 +2559,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
}
if (panic_on_oops)
panic("Fatal exception");
- if (regs->tstate & TSTATE_PRIV)
- do_exit(SIGKILL);
- do_exit(SIGSEGV);
+ make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV);
}
EXPORT_SYMBOL(die_if_kernel);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 90dc4ae315c8..ad569d9bd124 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -200,17 +200,15 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 9a9652a15fed..253e07043298 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -437,17 +437,15 @@ good_area:
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
- goto retry;
- }
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 0ab58016db22..5c092a9153ea 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -616,7 +616,7 @@ static void um_pci_virtio_remove(struct virtio_device *vdev)
int i;
/* Stop all virtqueues */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
device_set_wakeup_enable(&vdev->dev, false);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 561a2b03c3cf..d1d5d0be0308 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -87,12 +87,10 @@ good_area:
}
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ goto retry;
}
pmd = pmd_off(mm, address);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 976dd6b532bf..ebe8fc76949a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -78,7 +78,7 @@ config X86
select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION
+ select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
@@ -104,6 +104,7 @@ config X86
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_LTO_CLANG
@@ -136,7 +137,6 @@ config X86
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC
@@ -239,6 +239,7 @@ config X86
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
@@ -252,6 +253,8 @@ config X86
select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
@@ -332,15 +335,6 @@ config ARCH_HAS_CPU_RELAX
config ARCH_HAS_FILTER_PGPROT
def_bool y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool y
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
- def_bool y
-
config ARCH_HIBERNATION_POSSIBLE
def_bool y
@@ -1574,6 +1568,7 @@ config NUMA
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
default y if X86_BIGSMP
+ select USE_PERCPU_NUMA_NODE_ID
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -2449,10 +2444,6 @@ config ARCH_HAS_ADD_PAGES
config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
def_bool y
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e11813646051..6115274fe10f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -126,17 +126,17 @@ vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,bzip2)
+ $(call if_changed,bzip2_with_size)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzma)
+ $(call if_changed,lzma_with_size)
$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,xzkern)
+ $(call if_changed,xzkern_with_size)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzo)
+ $(call if_changed,lzo_with_size)
$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lz4)
+ $(call if_changed,lz4_with_size)
$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,zstd22)
+ $(call if_changed,zstd22_with_size)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a7ec22b1d06c..887420844066 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1241,14 +1241,14 @@ SYM_CODE_START(asm_exc_nmi)
SYM_CODE_END(asm_exc_nmi)
.pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
- call do_exit
+ call make_task_dead
1: jmp 1b
-SYM_CODE_END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ffdbfaad2e2..466df3e50276 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1427,7 +1427,7 @@ SYM_CODE_END(ignore_sysret)
#endif
.pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_and_make_dead)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1436,6 +1436,6 @@ SYM_CODE_START(rewind_stack_do_exit)
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS
- call do_exit
-SYM_CODE_END(rewind_stack_do_exit)
+ call make_task_dead
+SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7e25543693de..320480a8db4f 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -454,3 +454,4 @@
447 i386 memfd_secret sys_memfd_secret
448 i386 process_mrelease sys_process_mrelease
449 i386 futex_waitv sys_futex_waitv
+450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index fe8f8dd157b4..c84d12608cd2 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,6 +371,7 @@
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fd9f908debe5..c91434056c29 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
+
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void)
}
if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..669c2be14784 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -8,14 +8,6 @@
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -305,11 +297,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support && x86_pmu.lbr_has_tsx;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
-
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f1ba6ab2e97e..e497da9bf427 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
- .mmio_init = tgl_uncore_mmio_init,
+ .mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index b9687980aab6..2adeaf4de4df 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
-void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
+void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
+void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 3049c646fa20..6ddadb482f68 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6d735611c281..cfaf558bdb6b 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 0f63706cdadf..f698a55bde81 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -64,6 +65,20 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
+#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
+#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
+#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
+#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
+#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
+#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
+#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
+#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
+#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
+#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
+#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
+#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
+#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
+#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -155,6 +170,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ }
};
@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32);
#endif
+ addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
}
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, 0);
+}
+
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void)
}
/* end of Tiger Lake MMIO uncore support */
+
+/* Alder Lake MMIO uncore support */
+#define ADL_UNCORE_IMC_BASE 0xd900
+#define ADL_UNCORE_IMC_MAP_SIZE 0x200
+#define ADL_UNCORE_IMC_CTR 0xe8
+#define ADL_UNCORE_IMC_CTRL 0xd0
+#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
+#define ADL_UNCORE_IMC_BOX_CTL 0xc4
+#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
+#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
+
+#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
+#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
+#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
+#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
+ ADL_UNCORE_IMC_CTL_RST_CTRS)
+
+static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
+
+ /* The global control in MC1 can control both MCs. */
+ if (box->io_addr && (box->pmu->pmu_idx == 1))
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
+}
+
+static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(0, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static struct intel_uncore_ops adl_uncore_mmio_ops = {
+ .init_box = adl_uncore_imc_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = adl_uncore_mmio_disable_box,
+ .enable_box = adl_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = intel_generic_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
+#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ ADL_UNC_CTL_CHMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET)
+
+static struct attribute *adl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_chmask.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static const struct attribute_group adl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = adl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type adl_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = ADL_UNCORE_IMC_CTR,
+ .event_ctl = ADL_UNCORE_IMC_CTRL,
+ .event_mask = ADL_UNC_IMC_EVENT_MASK,
+ .box_ctl = ADL_UNCORE_IMC_BOX_CTL,
+ .mmio_offset = 0,
+ .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
+ .ops = &adl_uncore_mmio_ops,
+ .format_group = &adl_uncore_imc_format_group,
+};
+
+enum perf_adl_uncore_imc_freerunning_types {
+ ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ ADL_MMIO_UNCORE_IMC_DATA_READ,
+ ADL_MMIO_UNCORE_IMC_DATA_WRITE,
+ ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters adl_uncore_imc_freerunning[] = {
+ [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
+};
+
+static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
+}
+
+static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
+ .init_box = adl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type adl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
+ .freerunning = adl_uncore_imc_freerunning,
+ .ops = &adl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *adl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void adl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = adl_mmio_uncores;
+}
+
+/* end of Alder Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3660f698fb2a..ed869443efb2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
+ .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9d376e528dfc..150261d929b9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -215,7 +215,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
+ LBR_FORMAT_INFO2 = 0x07,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
};
enum {
@@ -840,6 +841,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+ unsigned int lbr_has_info:1;
+ unsigned int lbr_has_tsx:1;
+ unsigned int lbr_from_flags:1;
+ unsigned int lbr_to_cycles:1;
+
/*
* Intel Architectural LBR CPUID Enumeration
*/
@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_init(void);
+
void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 85feafacc445..77e3a47af5ad 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+static struct perf_msr amd_rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
+ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
+ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
+ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
};
-
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 96eb7db31c8e..8b392b6b7b93 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -28,6 +28,7 @@
#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include <linux/highmem.h>
+#include <linux/swiotlb.h>
int hyperv_init_cpuhp;
u64 hv_current_partition_id = ~0ull;
@@ -36,7 +37,7 @@ EXPORT_SYMBOL_GPL(hv_current_partition_id);
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-union hv_ghcb __percpu **hv_ghcb_pg;
+union hv_ghcb * __percpu *hv_ghcb_pg;
/* Storage to save the hypercall page temporarily for hibernation */
static void *hv_hypercall_pg_saved;
@@ -498,6 +499,17 @@ void __init hyperv_init(void)
/* Query the VMs extended capability once, so that it can be cached. */
hv_query_ext_cap(0);
+
+#ifdef CONFIG_SWIOTLB
+ /*
+ * Swiotlb bounce buffer needs to be mapped in extra address
+ * space. Map function doesn't work in the early place and so
+ * call swiotlb_update_mem_attributes() here.
+ */
+ if (hv_is_isolation_supported())
+ swiotlb_update_mem_attributes();
+#endif
+
return;
clean_guest_os_id:
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 69c7a57f3307..2b994117581e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -287,3 +287,31 @@ int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visibl
kfree(pfn_array);
return ret;
}
+
+/*
+ * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
+ */
+void *hv_map_memory(void *addr, unsigned long size)
+{
+ unsigned long *pfns = kcalloc(size / PAGE_SIZE,
+ sizeof(unsigned long), GFP_KERNEL);
+ void *vaddr;
+ int i;
+
+ if (!pfns)
+ return NULL;
+
+ for (i = 0; i < size / PAGE_SIZE; i++)
+ pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) +
+ (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT);
+
+ vaddr = vmap_pfn(pfns, size / PAGE_SIZE, PAGE_KERNEL_IO);
+ kfree(pfns);
+
+ return vaddr;
+}
+
+void hv_unmap_memory(void *addr)
+{
+ vunmap(addr);
+}
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index bd13736d0c05..0ad2378fe6ad 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -68,15 +68,6 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
local_irq_save(flags);
- /*
- * Only check the mask _after_ interrupt has been disabled to avoid the
- * mask changing under our feet.
- */
- if (cpumask_empty(cpus)) {
- local_irq_restore(flags);
- return;
- }
-
flush_pcpu = (struct hv_tlb_flush **)
this_cpu_ptr(hyperv_pcpu_input_arg);
@@ -115,7 +106,9 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
* must. We will also check all VP numbers when walking the
* supplied CPU set to remain correct in all cases.
*/
- if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
+ cpu = cpumask_last(cpus);
+
+ if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
goto do_ex_hypercall;
for_each_cpu(cpu, cpus) {
@@ -131,6 +124,12 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
__set_bit(vcpu, (unsigned long *)
&flush->processor_mask);
}
+
+ /* nothing to flush if 'processor_mask' ends up being empty */
+ if (!flush->processor_mask) {
+ local_irq_restore(flags);
+ return;
+ }
}
/*
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 0367efdc5b7a..a288ecd230ab 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -380,8 +380,6 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif
-#include <asm-generic/bitops/find.h>
-
#include <asm-generic/bitops/sched.h>
#include <asm/arch_hweight.h>
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 18de5f76f198..6db4e2932b3d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,7 +299,9 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index c2767a6a387e..c83b3020350a 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -132,10 +132,21 @@ static inline void fpstate_free(struct fpu *fpu) { }
/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+extern u64 xstate_get_guest_group_perm(void);
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
+extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);
+
+#ifdef CONFIG_X86_64
+extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+extern void fpu_sync_guest_vmexit_xfd_state(void);
+#else
+static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+#endif
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c06c82ab355..eb7cd1139d97 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -387,6 +387,8 @@ struct fpstate {
/* @regs is dynamically sized! Don't add anything after @regs! */
} __aligned(64);
+#define FPU_GUEST_PERM_LOCKED BIT_ULL(63)
+
struct fpu_state_perm {
/*
* @__state_perm:
@@ -477,6 +479,13 @@ struct fpu {
struct fpu_state_perm perm;
/*
+ * @guest_perm:
+ *
+ * Permission related information for guest pseudo FPUs
+ */
+ struct fpu_state_perm guest_perm;
+
+ /*
* @__fpstate:
*
* Initial in-memory storage for FPU registers which are saved in
@@ -496,6 +505,29 @@ struct fpu {
*/
struct fpu_guest {
/*
+ * @xfeatures: xfeature bitmap of features which are
+ * currently enabled for the guest vCPU.
+ */
+ u64 xfeatures;
+
+ /*
+ * @perm: xfeature bitmap of features which are
+ * permitted to be enabled for the guest
+ * vCPU.
+ */
+ u64 perm;
+
+ /*
+ * @xfd_err: Save the guest value.
+ */
+ u64 xfd_err;
+
+ /*
+ * @uabi_size: Size required for save/restore
+ */
+ unsigned int uabi_size;
+
+ /*
* @fpstate: Pointer to the allocated guest fpstate
*/
struct fpstate *fpstate;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 381e88122a5f..0a9407dc0859 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -602,6 +602,39 @@ enum hv_interrupt_type {
HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A,
};
+union hv_msi_address_register {
+ u32 as_uint32;
+ struct {
+ u32 reserved1:2;
+ u32 destination_mode:1;
+ u32 redirection_hint:1;
+ u32 reserved2:8;
+ u32 destination_id:8;
+ u32 msi_base:12;
+ };
+} __packed;
+
+union hv_msi_data_register {
+ u32 as_uint32;
+ struct {
+ u32 vector:8;
+ u32 delivery_mode:3;
+ u32 reserved1:3;
+ u32 level_assert:1;
+ u32 trigger_mode:1;
+ u32 reserved2:16;
+ };
+} __packed;
+
+/* HvRetargetDeviceInterrupt hypercall */
+union hv_msi_entry {
+ u64 as_uint64;
+ struct {
+ union hv_msi_address_register address;
+ union hv_msi_data_register data;
+ } __packed;
+};
+
#include <asm-generic/hyperv-tlfs.h>
#endif
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 9e50da3ed01a..631d5040b31e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -35,6 +35,7 @@ KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
KVM_X86_OP_NULL(get_cs_db_l_bits)
KVM_X86_OP(set_cr0)
+KVM_X86_OP_NULL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
KVM_X86_OP(set_cr4)
KVM_X86_OP(set_efer)
@@ -54,6 +55,7 @@ KVM_X86_OP_NULL(tlb_remote_flush)
KVM_X86_OP_NULL(tlb_remote_flush_with_range)
KVM_X86_OP(tlb_flush_gva)
KVM_X86_OP(tlb_flush_guest)
+KVM_X86_OP(vcpu_pre_run)
KVM_X86_OP(run)
KVM_X86_OP_NULL(handle_exit)
KVM_X86_OP_NULL(skip_emulated_instruction)
@@ -97,8 +99,6 @@ KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_NULL(request_immediate_exit)
KVM_X86_OP(sched_in)
KVM_X86_OP_NULL(update_cpu_dirty_logging)
-KVM_X86_OP_NULL(pre_block)
-KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
KVM_X86_OP_NULL(update_pi_irte)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d0ad98ddd459..1384517d7709 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -135,7 +135,7 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-#define KVM_PERMILLE_MMU_PAGES 20
+#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
@@ -291,25 +291,31 @@ struct kvm_kernel_irq_routing_entry;
* the number of unique SPs that can theoretically be created is 2^n, where n
* is the number of bits that are used to compute the role.
*
- * But, even though there are 18 bits in the mask below, not all combinations
- * of modes and flags are possible. The maximum number of possible upper-level
- * shadow pages for a single gfn is in the neighborhood of 2^13.
+ * But, even though there are 19 bits in the mask below, not all combinations
+ * of modes and flags are possible:
*
- * - invalid shadow pages are not accounted.
- * - level is effectively limited to four combinations, not 16 as the number
- * bits would imply, as 4k SPs are not tracked (allowed to go unsync).
- * - level is effectively unused for non-PAE paging because there is exactly
- * one upper level (see 4k SP exception above).
- * - quadrant is used only for non-PAE paging and is exclusive with
- * gpte_is_8_bytes.
- * - execonly and ad_disabled are used only for nested EPT, which makes it
- * exclusive with quadrant.
+ * - invalid shadow pages are not accounted, so the bits are effectively 18
+ *
+ * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
+ * execonly and ad_disabled are only used for nested EPT which has
+ * has_4_byte_gpte=0. Therefore, 2 bits are always unused.
+ *
+ * - the 4 bits of level are effectively limited to the values 2/3/4/5,
+ * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE
+ * paging has exactly one upper level, making level completely redundant
+ * when has_4_byte_gpte=1.
+ *
+ * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
+ * cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
+ *
+ * Therefore, the maximum number of possible upper-level shadow pages for a
+ * single gfn is a bit less than 2^13.
*/
union kvm_mmu_page_role {
u32 word;
struct {
unsigned level:4;
- unsigned gpte_is_8_bytes:1;
+ unsigned has_4_byte_gpte:1;
unsigned quadrant:2;
unsigned direct:1;
unsigned access:3;
@@ -420,10 +426,9 @@ struct kvm_mmu {
int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
- u32 access, struct x86_exception *exception);
- gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gpa_t gva_or_gpa, u32 access,
+ struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
@@ -490,6 +495,7 @@ struct kvm_pmc {
*/
u64 current_config;
bool is_paused;
+ bool intr;
};
struct kvm_pmu {
@@ -604,6 +610,7 @@ struct kvm_vcpu_xen {
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
+ unsigned long evtchn_pending_sel;
};
struct kvm_vcpu_arch {
@@ -640,6 +647,7 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool tpr_access_reporting;
bool xsaves_enabled;
+ bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
@@ -1015,7 +1023,7 @@ struct msr_bitmap_range {
struct kvm_xen {
bool long_mode;
u8 upcall_vector;
- gfn_t shinfo_gfn;
+ struct gfn_to_pfn_cache shinfo_cache;
};
enum kvm_irqchip_mode {
@@ -1338,6 +1346,7 @@ struct kvm_x86_ops {
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
@@ -1372,6 +1381,7 @@ struct kvm_x86_ops {
*/
void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+ int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
@@ -1445,18 +1455,6 @@ struct kvm_x86_ops {
const struct kvm_pmu_ops *pmu_ops;
const struct kvm_x86_nested_ops *nested_ops;
- /*
- * Architecture specific hooks for vCPU blocking due to
- * HLT instruction.
- * Returns for .pre_block():
- * - 0 means continue to block the vCPU.
- * - 1 means we cannot block the vCPU since some event
- * happens during this period, such as, 'ON' bit in
- * posted-interrupts descriptor is set.
- */
- int (*pre_block)(struct kvm_vcpu *vcpu);
- void (*post_block)(struct kvm_vcpu *vcpu);
-
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
@@ -1592,10 +1590,9 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1645,7 +1642,8 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
* decode the instruction length. For use *only* by
- * kvm_x86_ops.skip_emulated_instruction() implementations.
+ * kvm_x86_ops.skip_emulated_instruction() implementations if
+ * EMULTYPE_COMPLETE_USER_EXIT is not set.
*
* EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
* retry native execution under certain conditions,
@@ -1665,6 +1663,10 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
* case the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ * state and inject single-step #DBs after skipping
+ * an instruction (after completing userspace I/O).
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -1673,6 +1675,7 @@ extern u64 kvm_mce_cap_supported;
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
#define EMULTYPE_VMWARE_GP (1 << 5)
#define EMULTYPE_PF (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
@@ -1697,7 +1700,7 @@ int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1763,12 +1766,9 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free);
void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1930,8 +1930,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-
static inline int kvm_cpu_get_apicid(int mps_cpu)
{
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 9d4a3b1b25b9..eb186bc57f6a 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -63,9 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
void kvm_slot_page_track_remove_page(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
+bool kvm_slot_page_track_is_active(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ gfn_t gfn, enum kvm_page_track_mode mode);
void
kvm_page_track_register_notifier(struct kvm *kvm,
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index da3972fe5a7a..a82f603d4312 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -30,7 +30,7 @@ extern void *hv_hypercall_pg;
extern u64 hv_current_partition_id;
-extern union hv_ghcb __percpu **hv_ghcb_pg;
+extern union hv_ghcb * __percpu *hv_ghcb_pg;
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
@@ -169,13 +169,6 @@ bool hv_vcpu_is_preempted(int vcpu);
static inline void hv_apic_init(void) {}
#endif
-static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
- struct msi_desc *msi_desc)
-{
- msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
- msi_entry->data.as_uint32 = msi_desc->msg.data;
-}
-
struct irq_domain *hv_create_pci_msi_domain(void);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34430b7af4a..8a9432fb3802 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -27,6 +27,7 @@
#include <asm/pkru.h>
#include <asm/fpu/api.h>
#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -753,7 +754,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
return true;
if ((pte_flags(a) & _PAGE_PROTNONE) &&
- mm_tlb_flush_pending(mm))
+ atomic_read(&mm->tlb_flush_pending))
return true;
return false;
@@ -1007,18 +1008,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ page_table_check_pte_set(mm, addr, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
+ page_table_check_pud_set(mm, addr, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1049,6 +1053,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
return pte;
}
@@ -1064,12 +1069,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
return pte;
}
+#define __HAVE_ARCH_PTEP_CLEAR
+static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK))
+ ptep_get_and_clear(mm, addr, ptep);
+ else
+ pte_clear(mm, addr, ptep);
+}
+
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
@@ -1110,14 +1126,22 @@ static inline int pmd_write(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- return native_pmdp_get_and_clear(pmdp);
+ pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+ page_table_check_pmd_clear(mm, addr, pmd);
+
+ return pmd;
}
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- return native_pudp_get_and_clear(pudp);
+ pud_t pud = native_pudp_get_and_clear(pudp);
+
+ page_table_check_pud_clear(mm, addr, pud);
+
+ return pud;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1138,6 +1162,7 @@ static inline int pud_write(pud_t pud)
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..2da3316bb559 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index c132daabe615..3e6f6b448f6a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -760,9 +760,9 @@ void __init lapic_update_legacy_vectors(void)
void __init lapic_assign_system_vectors(void)
{
- unsigned int i, vector = 0;
+ unsigned int i, vector;
- for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+ for_each_set_bit(vector, system_vectors, NR_VECTORS)
irq_matrix_assign_system(vector_matrix, vector, false);
if (nr_legacy_irqs() > 1)
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
index 8a25b1c0d480..6a6ffcd978f6 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/kernel/cc_platform.c
@@ -11,6 +11,7 @@
#include <linux/cc_platform.h>
#include <linux/mem_encrypt.h>
+#include <asm/mshyperv.h>
#include <asm/processor.h>
static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
@@ -66,12 +67,19 @@ static bool amd_cc_platform_has(enum cc_attr attr)
#endif
}
+static bool hyperv_cc_platform_has(enum cc_attr attr)
+{
+ return attr == CC_ATTR_GUEST_MEM_ENCRYPT;
+}
bool cc_platform_has(enum cc_attr attr)
{
if (sme_me_mask)
return amd_cc_platform_has(attr);
+ if (hv_is_isolation_supported())
+ return hyperv_cc_platform_has(attr);
+
return false;
}
EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 2a0f83678911..5a99f993e639 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -18,6 +18,7 @@
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h>
+#include <linux/swiotlb.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -329,8 +330,20 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
- if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
+ if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
static_branch_enable(&isolation_type_snp);
+#ifdef CONFIG_SWIOTLB
+ swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
+#endif
+ }
+
+#ifdef CONFIG_SWIOTLB
+ /*
+ * Enable swiotlb force mode in Isolation VM to
+ * use swiotlb bounce buffer for dma transaction.
+ */
+ swiotlb_force = SWIOTLB_FORCE;
+#endif
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ea4fe192189d..53de044e5654 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -351,7 +351,7 @@ unsigned long oops_begin(void)
}
NOKPROBE_SYMBOL(oops_begin);
-void __noreturn rewind_stack_do_exit(int signr);
+void __noreturn rewind_stack_and_make_dead(int signr);
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
@@ -386,7 +386,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* reuse the task stack and that existing poisons are invalid.
*/
kasan_unpoison_task_stack(current);
- rewind_stack_do_exit(signr);
+ rewind_stack_and_make_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fd2d3ab38ebb..dc7da08bc700 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = {
.stolen_size = gen9_stolen_size,
};
+/* Intel integrated GPUs for which we need to reserve "stolen memory" */
static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_I830_IDS(&i830_early_ops),
INTEL_I845G_IDS(&i845_early_ops),
@@ -592,6 +593,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func)
u16 device;
int i;
+ /*
+ * Reserve "stolen memory" for an integrated GPU. If we've already
+ * found one, there's nothing to do for other (discrete) GPUs.
+ */
+ if (resource_size(&intel_graphics_stolen_res))
+ return;
+
device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
@@ -704,7 +712,7 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
- QFLAG_APPLY_ONCE, intel_graphics_quirks },
+ 0, intel_graphics_quirks },
/*
* HPET on the current version of the Baytrail platform has accuracy
* problems: it will halt in deep idle state - so we disable it.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index dd3777ac0443..8dea01ffc5c1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,7 +206,27 @@ void fpu_reset_from_exception_fixup(void)
}
#if IS_ENABLED(CONFIG_KVM)
-static void __fpstate_reset(struct fpstate *fpstate);
+static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
+
+static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+{
+ struct fpu_state_perm *fpuperm;
+ u64 perm;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ spin_lock_irq(&current->sighand->siglock);
+ fpuperm = &current->group_leader->thread.fpu.guest_perm;
+ perm = fpuperm->__state_perm;
+
+ /* First fpstate allocation locks down permissions. */
+ WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
+
+ spin_unlock_irq(&current->sighand->siglock);
+
+ gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
+}
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
{
@@ -218,12 +238,18 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
if (!fpstate)
return false;
- __fpstate_reset(fpstate);
+ /* Leave xfd to 0 (the reset value defined by spec) */
+ __fpstate_reset(fpstate, 0);
fpstate_init_user(fpstate);
fpstate->is_valloc = true;
fpstate->is_guest = true;
- gfpu->fpstate = fpstate;
+ gfpu->fpstate = fpstate;
+ gfpu->xfeatures = fpu_user_cfg.default_features;
+ gfpu->perm = fpu_user_cfg.default_features;
+ gfpu->uabi_size = fpu_user_cfg.default_size;
+ fpu_init_guest_permissions(gfpu);
+
return true;
}
EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
@@ -243,6 +269,64 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
+/*
+ * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable
+ * @guest_fpu: Pointer to the guest FPU container
+ * @xfeatures: Features requested by guest CPUID
+ *
+ * Enable all dynamic xfeatures according to guest perm and requested CPUID.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
+{
+ lockdep_assert_preemption_enabled();
+
+ /* Nothing to do if all requested features are already enabled. */
+ xfeatures &= ~guest_fpu->xfeatures;
+ if (!xfeatures)
+ return 0;
+
+ return __xfd_enable_feature(xfeatures, guest_fpu);
+}
+EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);
+
+#ifdef CONFIG_X86_64
+void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
+{
+ fpregs_lock();
+ guest_fpu->fpstate->xfd = xfd;
+ if (guest_fpu->fpstate->in_use)
+ xfd_update_state(guest_fpu->fpstate);
+ fpregs_unlock();
+}
+EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
+
+/**
+ * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
+ *
+ * Must be invoked from KVM after a VMEXIT before enabling interrupts when
+ * XFD write emulation is disabled. This is required because the guest can
+ * freely modify XFD and the state at VMEXIT is not guaranteed to be the
+ * same as the state on VMENTER. So software state has to be udpated before
+ * any operation which depends on it can take place.
+ *
+ * Note: It can be invoked unconditionally even when write emulation is
+ * enabled for the price of a then pointless MSR read.
+ */
+void fpu_sync_guest_vmexit_xfd_state(void)
+{
+ struct fpstate *fps = current->thread.fpu.fpstate;
+
+ lockdep_assert_irqs_disabled();
+ if (fpu_state_size_dynamic()) {
+ rdmsrl(MSR_IA32_XFD, fps->xfd);
+ __this_cpu_write(xfd_state, fps->xfd);
+ }
+}
+EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
+#endif /* CONFIG_X86_64 */
+
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
@@ -437,26 +521,28 @@ void fpstate_init_user(struct fpstate *fpstate)
fpstate_init_fstate(fpstate);
}
-static void __fpstate_reset(struct fpstate *fpstate)
+static void __fpstate_reset(struct fpstate *fpstate, u64 xfd)
{
/* Initialize sizes and feature masks */
fpstate->size = fpu_kernel_cfg.default_size;
fpstate->user_size = fpu_user_cfg.default_size;
fpstate->xfeatures = fpu_kernel_cfg.default_features;
fpstate->user_xfeatures = fpu_user_cfg.default_features;
- fpstate->xfd = init_fpstate.xfd;
+ fpstate->xfd = xfd;
}
void fpstate_reset(struct fpu *fpu)
{
/* Set the fpstate pointer to the default fpstate */
fpu->fpstate = &fpu->__fpstate;
- __fpstate_reset(fpu->fpstate);
+ __fpstate_reset(fpu->fpstate, init_fpstate.xfd);
/* Initialize the permission related info in fpu */
fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
fpu->perm.__state_size = fpu_kernel_cfg.default_size;
fpu->perm.__user_state_size = fpu_user_cfg.default_size;
+ /* Same defaults for guests */
+ fpu->guest_perm = fpu->perm;
}
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
@@ -467,6 +553,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
dst_fpu->perm = src_fpu->perm;
+ dst_fpu->guest_perm = src_fpu->guest_perm;
spin_unlock_irq(&current->sighand->siglock);
}
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d28829403ed0..02b3ddaf4f75 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1500,35 +1500,13 @@ void fpstate_free(struct fpu *fpu)
}
/**
- * fpu_install_fpstate - Update the active fpstate in the FPU
- *
- * @fpu: A struct fpu * pointer
- * @newfps: A struct fpstate * pointer
- *
- * Returns: A null pointer if the last active fpstate is the embedded
- * one or the new fpstate is already installed;
- * otherwise, a pointer to the old fpstate which has to
- * be freed by the caller.
- */
-static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
- struct fpstate *newfps)
-{
- struct fpstate *oldfps = fpu->fpstate;
-
- if (fpu->fpstate == newfps)
- return NULL;
-
- fpu->fpstate = newfps;
- return oldfps != &fpu->__fpstate ? oldfps : NULL;
-}
-
-/**
* fpstate_realloc - Reallocate struct fpstate for the requested new features
*
* @xfeatures: A bitmap of xstate features which extend the enabled features
* of that task
* @ksize: The required size for the kernel buffer
* @usize: The required size for user space buffers
+ * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
*
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
@@ -1537,13 +1515,13 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
* Returns: 0 on success, -ENOMEM on allocation error.
*/
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
- unsigned int usize)
+ unsigned int usize, struct fpu_guest *guest_fpu)
{
struct fpu *fpu = &current->thread.fpu;
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
+ bool in_use;
- curfps = fpu->fpstate;
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
newfps = vzalloc(fpsize);
@@ -1553,28 +1531,56 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
newfps->user_size = usize;
newfps->is_valloc = true;
+ /*
+ * When a guest FPU is supplied, use @guest_fpu->fpstate
+ * as reference independent whether it is in use or not.
+ */
+ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
+
+ /* Determine whether @curfps is the active fpstate */
+ in_use = fpu->fpstate == curfps;
+
+ if (guest_fpu) {
+ newfps->is_guest = true;
+ newfps->is_confidential = curfps->is_confidential;
+ newfps->in_use = curfps->in_use;
+ guest_fpu->xfeatures |= xfeatures;
+ guest_fpu->uabi_size = usize;
+ }
+
fpregs_lock();
/*
- * Ensure that the current state is in the registers before
- * swapping fpstate as that might invalidate it due to layout
- * changes.
+ * If @curfps is in use, ensure that the current state is in the
+ * registers before swapping fpstate as that might invalidate it
+ * due to layout changes.
*/
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
- curfps = fpu_install_fpstate(fpu, newfps);
-
/* Do the final updates within the locked region */
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
- xfd_update_state(newfps);
+ if (guest_fpu) {
+ guest_fpu->fpstate = newfps;
+ /* If curfps is active, update the FPU fpstate pointer */
+ if (in_use)
+ fpu->fpstate = newfps;
+ } else {
+ fpu->fpstate = newfps;
+ }
+
+ if (in_use)
+ xfd_update_state(fpu->fpstate);
fpregs_unlock();
- vfree(curfps);
+ /* Only free valloc'ed state */
+ if (curfps && curfps->is_valloc)
+ vfree(curfps);
+
return 0;
}
@@ -1595,7 +1601,7 @@ static int validate_sigaltstack(unsigned int usize)
return 0;
}
-static int __xstate_request_perm(u64 permitted, u64 requested)
+static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
{
/*
* This deliberately does not exclude !XSAVES as we still might
@@ -1605,9 +1611,10 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
- int ret;
+ int ret = 0;
/* Check whether fully enabled */
if ((permitted & requested) == requested)
@@ -1621,15 +1628,18 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
mask &= XFEATURE_MASK_USER_SUPPORTED;
usize = xstate_calculate_size(mask, false);
- ret = validate_sigaltstack(usize);
- if (ret)
- return ret;
+ if (!guest) {
+ ret = validate_sigaltstack(usize);
+ if (ret)
+ return ret;
+ }
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
- WRITE_ONCE(fpu->perm.__state_perm, requested);
+ WRITE_ONCE(perm->__state_perm, requested);
/* Protected by sighand lock */
- fpu->perm.__state_size = ksize;
- fpu->perm.__user_state_size = usize;
+ perm->__state_size = ksize;
+ perm->__user_state_size = usize;
return ret;
}
@@ -1640,7 +1650,7 @@ static const u64 xstate_prctl_req[XFEATURE_MAX] = {
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
};
-static int xstate_request_perm(unsigned long idx)
+static int xstate_request_perm(unsigned long idx, bool guest)
{
u64 permitted, requested;
int ret;
@@ -1661,26 +1671,33 @@ static int xstate_request_perm(unsigned long idx)
return -EOPNOTSUPP;
/* Lockless quick check */
- permitted = xstate_get_host_group_perm();
+ permitted = xstate_get_group_perm(guest);
if ((permitted & requested) == requested)
return 0;
/* Protect against concurrent modifications */
spin_lock_irq(&current->sighand->siglock);
- permitted = xstate_get_host_group_perm();
- ret = __xstate_request_perm(permitted, requested);
+ permitted = xstate_get_group_perm(guest);
+
+ /* First vCPU allocation locks the permissions. */
+ if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
+ ret = -EBUSY;
+ else
+ ret = __xstate_request_perm(permitted, requested, guest);
spin_unlock_irq(&current->sighand->siglock);
return ret;
}
-int xfd_enable_feature(u64 xfd_err)
+int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
{
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
struct fpu *fpu;
if (!xfd_event) {
- pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
+ if (!guest_fpu)
+ pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
return 0;
}
@@ -1688,14 +1705,16 @@ int xfd_enable_feature(u64 xfd_err)
spin_lock_irq(&current->sighand->siglock);
/* If not permitted let it die */
- if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
+ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
spin_unlock_irq(&current->sighand->siglock);
return -EPERM;
}
fpu = &current->group_leader->thread.fpu;
- ksize = fpu->perm.__state_size;
- usize = fpu->perm.__user_state_size;
+ perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
+ ksize = perm->__state_size;
+ usize = perm->__user_state_size;
+
/*
* The feature is permitted. State size is sufficient. Dropping
* the lock is safe here even if more features are added from
@@ -1708,17 +1727,29 @@ int xfd_enable_feature(u64 xfd_err)
* Try to allocate a new fpstate. If that fails there is no way
* out.
*/
- if (fpstate_realloc(xfd_event, ksize, usize))
+ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
return -EFAULT;
return 0;
}
+
+int xfd_enable_feature(u64 xfd_err)
+{
+ return __xfd_enable_feature(xfd_err, NULL);
+}
+
#else /* CONFIG_X86_64 */
-static inline int xstate_request_perm(unsigned long idx)
+static inline int xstate_request_perm(unsigned long idx, bool guest)
{
return -EPERM;
}
#endif /* !CONFIG_X86_64 */
+u64 xstate_get_guest_group_perm(void)
+{
+ return xstate_get_group_perm(true);
+}
+EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
+
/**
* fpu_xstate_prctl - xstate permission operations
* @tsk: Redundant pointer to current
@@ -1742,6 +1773,7 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
u64 __user *uptr = (u64 __user *)arg2;
u64 permitted, supported;
unsigned long idx = arg2;
+ bool guest = false;
if (tsk != current)
return -EPERM;
@@ -1760,11 +1792,20 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
permitted &= XFEATURE_MASK_USER_SUPPORTED;
return put_user(permitted, uptr);
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ permitted = xstate_get_guest_group_perm();
+ permitted &= XFEATURE_MASK_USER_SUPPORTED;
+ return put_user(permitted, uptr);
+
+ case ARCH_REQ_XCOMP_GUEST_PERM:
+ guest = true;
+ fallthrough;
+
case ARCH_REQ_XCOMP_PERM:
if (!IS_ENABLED(CONFIG_X86_64))
return -EOPNOTSUPP;
- return xstate_request_perm(idx);
+ return xstate_request_perm(idx, guest);
default:
return -EINVAL;
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index e0c9264b1dd0..d22ace092ca2 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -20,10 +20,19 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
-static inline u64 xstate_get_host_group_perm(void)
+static inline u64 xstate_get_group_perm(bool guest)
{
+ struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
+
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
- return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
+ return READ_ONCE(perm->__state_perm);
+}
+
+static inline u64 xstate_get_host_group_perm(void)
+{
+ return xstate_get_group_perm(false);
}
enum xstate_copy_mode {
@@ -145,8 +154,14 @@ static inline void xfd_update_state(struct fpstate *fpstate)
}
}
}
+
+extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
static inline void xfd_update_state(struct fpstate *fpstate) { }
+
+static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
+ return -EPERM;
+}
#endif
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 882213df3713..71f336425e58 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1435,8 +1435,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_rtc_timer_reinit();
memset(&curr_time, 0, sizeof(struct rtc_time));
- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- mc146818_get_time(&curr_time);
+ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
+ if (unlikely(mc146818_get_time(&curr_time) < 0)) {
+ pr_err_ratelimited("unable to read current time from RTC\n");
+ return IRQ_HANDLED;
+ }
+ }
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 59abbdad7729..a438217cbfac 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -313,7 +313,7 @@ static void kvm_register_steal_time(void)
return;
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
- pr_info("stealtime: cpu %d, msr %llx\n", cpu,
+ pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
(unsigned long long) slow_virt_to_phys(st));
}
@@ -350,7 +350,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
- pr_info("setup async PF for cpu %d\n", smp_processor_id());
+ pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -376,7 +376,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
- pr_info("disable async PF for cpu %d\n", smp_processor_id());
+ pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
static void kvm_disable_steal_time(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 462dd8e9b03d..a35cbf9107af 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -174,7 +174,7 @@ static void kvm_register_clock(char *txt)
pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
wrmsrl(msr_kvm_system_time, pa);
- pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
+ pr_debug("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
}
static void kvm_save_sched_clock_state(void)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 169fb6f4cd2e..95fa745e310a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void)
void *module_alloc(unsigned long size)
{
+ gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (PAGE_ALIGN(size) > MODULES_LEN)
@@ -74,10 +75,10 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
- MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
+ MODULES_END, gfp_mask,
+ PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5d481038fe0b..81d8ef036637 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -993,6 +993,8 @@ long do_arch_prctl_common(struct task_struct *task, int option,
case ARCH_GET_XCOMP_SUPP:
case ARCH_GET_XCOMP_PERM:
case ARCH_REQ_XCOMP_PERM:
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ case ARCH_REQ_XCOMP_GUEST_PERM:
return fpu_xstate_prctl(task, option, arg2);
}
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 9b9fb7882c20..9ae64f9af956 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/dmi.h>
#include <linux/ioport.h>
#include <asm/e820/api.h>
@@ -23,11 +24,31 @@ static void resource_clip(struct resource *res, resource_size_t start,
res->start = end + 1;
}
+/*
+ * Some BIOS-es contain a bug where they add addresses which map to
+ * system RAM in the PCI host bridge window returned by the ACPI _CRS
+ * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when
+ * allocating address space"). To avoid this Linux by default excludes
+ * E820 reservations when allocating addresses since 2010.
+ * In 2019 some systems have shown-up with E820 reservations which cover
+ * the entire _CRS returned PCI host bridge window, causing all attempts
+ * to assign memory to PCI BARs to fail if Linux uses E820 reservations.
+ *
+ * Ideally Linux would fully stop using E820 reservations, but then
+ * the old systems this was added for will regress.
+ * Instead keep the old behavior for old systems, while ignoring the
+ * E820 reservations for any systems from now on.
+ */
static void remove_e820_regions(struct resource *avail)
{
- int i;
+ int i, year = dmi_get_bios_year();
struct e820_entry *entry;
+ if (year >= 2018)
+ return;
+
+ pr_info_once("PCI: Removing E820 reservations from host bridge windows\n");
+
for (i = 0; i < e820_table->nr_entries; i++) {
entry = &e820_table->entries[i];
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7b65275544b2..49325caa7307 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -84,60 +84,6 @@ static bool __init pcpu_need_numa(void)
}
#endif
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
- unsigned long align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
-
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
- cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return pcpu_alloc_bootmem(cpu, size, align);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NUMA
@@ -150,7 +96,12 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
#endif
}
-static void __init pcpup_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
@@ -205,15 +156,14 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_fc_alloc, pcpu_fc_free,
- pcpup_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 47bdbe705a76..2b1548da00eb 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -26,7 +26,9 @@ config KVM
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_PFNCACHE
select HAVE_KVM_IRQFD
+ select HAVE_KVM_DIRTY_RING
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
@@ -44,6 +46,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
+ select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
help
Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 75dfd27b6e8a..30f244b64523 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,12 +7,7 @@ ifeq ($(CONFIG_FRAME_POINTER),y)
OBJECT_FILES_NON_STANDARD_vmenter.o := y
endif
-KVM := ../../../virt/kvm
-
-kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
- $(KVM)/dirty_ring.o $(KVM)/binary_stats.o
-kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
+include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 07e9215e911d..3902c28fb6cb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -32,7 +32,7 @@
u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
EXPORT_SYMBOL_GPL(kvm_cpu_caps);
-static u32 xstate_required_size(u64 xstate_bv, bool compacted)
+u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -42,7 +42,11 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
if (xstate_bv & 0x1) {
u32 eax, ebx, ecx, edx, offset;
cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
- offset = compacted ? ret : ebx;
+ /* ECX[1]: 64B alignment in compacted form */
+ if (compacted)
+ offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret;
+ else
+ offset = ebx;
ret = max(ret, offset + eax);
}
@@ -80,9 +84,12 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
return NULL;
}
-static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entries,
+ int nent)
{
struct kvm_cpuid_entry2 *best;
+ u64 xfeatures;
/*
* The existing code assumes virtual address is 48-bit or 57-bit in the
@@ -96,6 +103,41 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return -EINVAL;
}
+ /*
+ * Exposing dynamic xfeatures to the guest requires additional
+ * enabling in the FPU, e.g. to expand the guest XSAVE state size.
+ */
+ best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ if (!best)
+ return 0;
+
+ xfeatures = best->eax | ((u64)best->edx << 32);
+ xfeatures &= XFEATURE_MASK_USER_DYNAMIC;
+ if (!xfeatures)
+ return 0;
+
+ return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures);
+}
+
+/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
+static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+ int nent)
+{
+ struct kvm_cpuid_entry2 *orig;
+ int i;
+
+ if (nent != vcpu->arch.cpuid_nent)
+ return -EINVAL;
+
+ for (i = 0; i < nent; i++) {
+ orig = &vcpu->arch.cpuid_entries[i];
+ if (e2[i].function != orig->function ||
+ e2[i].index != orig->index ||
+ e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
+ e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -125,14 +167,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
}
}
-static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entries, int nent)
{
u32 base = vcpu->arch.kvm_cpuid_base;
if (!base)
return NULL;
- return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+ return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+ return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -147,11 +196,12 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.pv_cpuid.features = best->eax;
}
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
+ int nent)
{
struct kvm_cpuid_entry2 *best;
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ best = cpuid_entry2_find(entries, nent, 1, 0);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
@@ -162,33 +212,38 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
}
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ best = cpuid_entry2_find(entries, nent, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
- best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+ best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
- best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+ best = cpuid_entry2_find(entries, nent, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = kvm_find_kvm_cpuid_features(vcpu);
+ best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ best = cpuid_entry2_find(entries, nent, 0x1, 0);
if (best)
cpuid_entry_change(best, X86_FEATURE_MWAIT,
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
}
+
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+{
+ __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -276,21 +331,36 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
- int r;
+ int r;
+
+ __kvm_update_cpuid_runtime(vcpu, e2, nent);
+
+ /*
+ * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+ * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+ * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
+ * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
+ * the core vCPU model on the fly. It would've been better to forbid any
+ * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately
+ * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and do
+ * KVM_SET_CPUID{,2} again. To support this legacy behavior, check
+ * whether the supplied CPUID data is equal to what's already set.
+ */
+ if (vcpu->arch.last_vmentry_cpu != -1)
+ return kvm_cpuid_check_equal(vcpu, e2, nent);
- r = kvm_check_cpuid(e2, nent);
- if (r)
- return r;
+ r = kvm_check_cpuid(vcpu, e2, nent);
+ if (r)
+ return r;
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = nent;
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = nent;
- kvm_update_kvm_cpuid_base(vcpu);
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
+ kvm_update_kvm_cpuid_base(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
- return 0;
+ return 0;
}
/* when an old userspace process fills a new kernel module */
@@ -422,9 +492,11 @@ void kvm_set_cpu_caps(void)
#ifdef CONFIG_X86_64
unsigned int f_gbpages = F(GBPAGES);
unsigned int f_lm = F(LM);
+ unsigned int f_xfd = F(XFD);
#else
unsigned int f_gbpages = 0;
unsigned int f_lm = 0;
+ unsigned int f_xfd = 0;
#endif
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
@@ -492,7 +564,8 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16)
+ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -511,7 +584,7 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
- F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+ F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
kvm_cpu_cap_init_scattered(CPUID_12_EAX,
@@ -523,7 +596,7 @@ void kvm_set_cpu_caps(void)
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT) | F(PERFCTR_CORE)
+ F(TOPOEXT) | 0 /* PERFCTR_CORE */
);
kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
@@ -637,6 +710,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
case 0x14:
case 0x17:
case 0x18:
+ case 0x1d:
+ case 0x1e:
case 0x1f:
case 0x8000001d:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -770,10 +845,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
perf_get_x86_pmu_capability(&cap);
/*
- * Only support guest architectural pmu on a host
- * with architectural pmu.
+ * The guest architecture pmu is only supported if the architecture
+ * pmu exists on the host and the module parameters allow it.
*/
- if (!cap.version)
+ if (!cap.version || !enable_pmu)
memset(&cap, 0, sizeof(cap));
eax.split.version_id = min(cap.version, 2);
@@ -811,11 +886,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
}
break;
- case 0xd:
- entry->eax &= supported_xcr0;
+ case 0xd: {
+ u64 guest_perm = xstate_get_guest_group_perm();
+
+ entry->eax &= supported_xcr0 & guest_perm;
entry->ebx = xstate_required_size(supported_xcr0, false);
entry->ecx = entry->ebx;
- entry->edx &= supported_xcr0 >> 32;
+ entry->edx &= (supported_xcr0 & guest_perm) >> 32;
if (!supported_xcr0)
break;
@@ -859,9 +936,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
--array->nent;
continue;
}
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
+ entry->ecx &= ~BIT_ULL(2);
entry->edx = 0;
}
break;
+ }
case 0x12:
/* Intel SGX */
if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) {
@@ -906,6 +987,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
}
break;
+ /* Intel AMX TILE */
+ case 0x1d:
+ if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+
+ for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
+ if (!do_host_cpuid(array, function, i))
+ goto out;
+ }
+ break;
+ case 0x1e: /* TMUL information */
+ if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+ break;
case KVM_CPUID_SIGNATURE: {
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c99edfff7f82..8a770b481d9d 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -30,6 +30,8 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
+u32 xstate_required_size(u64 xstate_bv, bool compacted);
+
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index f33c804a922a..9240b3b7f8dd 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -110,9 +110,10 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
write_lock(&kvm->mmu_lock);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ int bkt;
+
slots = __kvm_memslots(kvm, i);
- for (j = 0; j < slots->used_slots; j++) {
- slot = &slots->memslots[j];
+ kvm_for_each_memslot(slot, bkt, slots)
for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
rmap = slot->arch.rmap[k];
lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
@@ -124,7 +125,6 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
cur[index]++;
}
}
- }
}
write_unlock(&kvm->mmu_lock);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1e19a4de441f..5719d8cfdbd9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -175,6 +175,7 @@
#define No16 ((u64)1 << 53) /* No 16 bit operand */
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
+#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -191,8 +192,9 @@
#define FASTOP_SIZE 8
struct opcode {
- u64 flags : 56;
- u64 intercept : 8;
+ u64 flags;
+ u8 intercept;
+ u8 pad[7];
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
const struct opcode *group;
@@ -4356,10 +4358,10 @@ static const struct opcode group4[] = {
static const struct opcode group5[] = {
F(DstMem | SrcNone | Lock, em_inc),
F(DstMem | SrcNone | Lock, em_dec),
- I(SrcMem | NearBranch, em_call_near_abs),
- I(SrcMemFAddr | ImplicitOps, em_call_far),
- I(SrcMem | NearBranch, em_jmp_abs),
- I(SrcMemFAddr | ImplicitOps, em_jmp_far),
+ I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
+ I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
+ I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
+ I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
};
@@ -4569,7 +4571,7 @@ static const struct opcode opcode_table[256] = {
I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
/* 0x70 - 0x7F */
- X16(D(SrcImmByte | NearBranch)),
+ X16(D(SrcImmByte | NearBranch | IsBranch)),
/* 0x80 - 0x87 */
G(ByteOp | DstMem | SrcImm, group1),
G(DstMem | SrcImm, group1),
@@ -4588,7 +4590,7 @@ static const struct opcode opcode_table[256] = {
DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
/* 0x98 - 0x9F */
D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
- I(SrcImmFAddr | No64, em_call_far), N,
+ I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
II(ImplicitOps | Stack, em_pushf, pushf),
II(ImplicitOps | Stack, em_popf, popf),
I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
@@ -4608,17 +4610,19 @@ static const struct opcode opcode_table[256] = {
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
/* 0xC0 - 0xC7 */
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
- I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
- I(ImplicitOps | NearBranch, em_ret),
+ I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
+ I(ImplicitOps | NearBranch | IsBranch, em_ret),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
- I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
- I(ImplicitOps | SrcImmU16, em_ret_far_imm),
- I(ImplicitOps, em_ret_far),
- D(ImplicitOps), DI(SrcImmByte, intn),
- D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
+ I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
+ I(Stack | IsBranch, em_leave),
+ I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
+ I(ImplicitOps | IsBranch, em_ret_far),
+ D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
+ D(ImplicitOps | No64 | IsBranch),
+ II(ImplicitOps | IsBranch, em_iret, iret),
/* 0xD0 - 0xD7 */
G(Src2One | ByteOp, group2), G(Src2One, group2),
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
@@ -4629,14 +4633,15 @@ static const struct opcode opcode_table[256] = {
/* 0xD8 - 0xDF */
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
/* 0xE0 - 0xE7 */
- X3(I(SrcImmByte | NearBranch, em_loop)),
- I(SrcImmByte | NearBranch, em_jcxz),
+ X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
+ I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
/* 0xE8 - 0xEF */
- I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
- I(SrcImmFAddr | No64, em_jmp_far),
- D(SrcImmByte | ImplicitOps | NearBranch),
+ I(SrcImm | NearBranch | IsBranch, em_call),
+ D(SrcImm | ImplicitOps | NearBranch | IsBranch),
+ I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
+ D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
/* 0xF0 - 0xF7 */
@@ -4652,7 +4657,7 @@ static const struct opcode opcode_table[256] = {
static const struct opcode twobyte_table[256] = {
/* 0x00 - 0x0F */
G(0, group6), GD(0, &group7), N, N,
- N, I(ImplicitOps | EmulateOnUD, em_syscall),
+ N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
@@ -4683,8 +4688,8 @@ static const struct opcode twobyte_table[256] = {
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
II(ImplicitOps | Priv, em_rdmsr, rdmsr),
IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
- I(ImplicitOps | EmulateOnUD, em_sysenter),
- I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
+ I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
+ I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
N, N,
N, N, N, N, N, N, N, N,
/* 0x40 - 0x4F */
@@ -4702,7 +4707,7 @@ static const struct opcode twobyte_table[256] = {
N, N, N, N,
N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
- X16(D(SrcImm | NearBranch)),
+ X16(D(SrcImm | NearBranch | IsBranch)),
/* 0x90 - 0x9F */
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
@@ -5216,6 +5221,8 @@ done_prefixes:
ctxt->d |= opcode.flags;
}
+ ctxt->is_branch = opcode.flags & IsBranch;
+
/* Unrecognised? */
if (ctxt->d == 0)
return EMULATION_FAILED;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8d8c1cc7cb53..6e38a7d22e97 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -164,7 +164,7 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
if (vpidx >= KVM_MAX_VCPUS)
return NULL;
@@ -1716,7 +1716,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
{
struct kvm_hv *hv = to_kvm_hv(kvm);
struct kvm_vcpu *vcpu;
- int i, bank, sbank = 0;
+ int bank, sbank = 0;
+ unsigned long i;
memset(vp_bitmap, 0,
KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
@@ -1863,7 +1864,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
.vector = vector
};
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
@@ -2519,6 +2520,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
case HYPERV_CPUID_NESTED_FEATURES:
ent->eax = evmcs_ver;
+ if (evmcs_ver)
+ ent->eax |= HV_X64_NESTED_MSR_BITMAP;
break;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 5a69cce4d72d..0b65a764ed3a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -242,7 +242,7 @@ static void pit_do_work(struct kthread_work *work)
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
struct kvm *kvm = pit->kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
struct kvm_kpit_state *ps = &pit->pit_state;
if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 0b80263d46d8..814064d06016 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -50,7 +50,7 @@ static void pic_unlock(struct kvm_pic *s)
{
bool wakeup = s->wakeup_needed;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
s->wakeup_needed = false;
@@ -270,7 +270,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
static void kvm_pic_reset(struct kvm_kpic_state *s)
{
- int irq, i;
+ int irq;
+ unsigned long i;
struct kvm_vcpu *vcpu;
u8 edge_irr = s->irr & ~s->elcr;
bool found = false;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 816a82515dcd..decfa36b7891 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -149,7 +149,7 @@ void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
if (RTC_GSI >= IOAPIC_NUM_PINS)
return;
@@ -184,7 +184,7 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index d5b72a08e566..6e0dab04320e 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -24,6 +24,7 @@
#include "hyperv.h"
#include "x86.h"
+#include "xen.h"
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
@@ -45,9 +46,9 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, struct dest_map *dest_map)
{
- int i, r = -1;
+ int r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
unsigned int dest_vcpus = 0;
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
@@ -175,6 +176,13 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
return r;
break;
+#ifdef CONFIG_KVM_XEN
+ case KVM_IRQ_ROUTING_XEN_EVTCHN:
+ if (!level)
+ return -1;
+
+ return kvm_xen_set_evtchn_fast(e, kvm);
+#endif
default:
break;
}
@@ -310,6 +318,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
+#ifdef CONFIG_KVM_XEN
+ case KVM_IRQ_ROUTING_XEN_EVTCHN:
+ return kvm_xen_setup_evtchn(kvm, e, ue);
+#endif
default:
return -EINVAL;
}
@@ -320,7 +332,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
- int i, r = 0;
+ int r = 0;
+ unsigned long i;
struct kvm_vcpu *vcpu;
if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 90e1ffdc05b7..3febc342360c 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,12 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
+#define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG)
+#define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP)
+#define X86_CR4_PDPTR_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP)
+
+static_assert(!(KVM_POSSIBLE_CR0_GUEST_BITS & X86_CR0_PDPTR_BITS));
+
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
{ \
@@ -37,6 +43,13 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
+/*
+ * avail dirty
+ * 0 0 register in VMCS/VMCB
+ * 0 1 *INVALID*
+ * 1 0 register in vcpu->arch
+ * 1 1 register in vcpu->arch, needs to be stored back
+ */
static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@@ -55,13 +68,6 @@ static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
-static inline void kvm_register_clear_available(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
- __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 68b420289d7e..39eded2426ff 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -369,6 +369,7 @@ struct x86_emulate_ctxt {
struct fetch_cache fetch;
struct read_cache io_read;
struct read_cache mem_read;
+ bool is_branch;
};
/* Repeat String Operation Prefix */
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
index c7db2df50a7a..b469f45e3fe4 100644
--- a/arch/x86/kvm/kvm_onhyperv.c
+++ b/arch/x86/kvm/kvm_onhyperv.c
@@ -33,7 +33,8 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
{
struct kvm_arch *kvm_arch = &kvm->arch;
struct kvm_vcpu *vcpu;
- int ret = 0, i, nr_unique_valid_roots;
+ int ret = 0, nr_unique_valid_roots;
+ unsigned long i;
hpa_t root;
spin_lock(&kvm_arch->hv_root_tdp_lock);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f206fc35deff..baca9fa37a91 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -185,7 +185,7 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
u32 max_id = 255; /* enough space for any xAPIC ID */
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
@@ -673,35 +673,34 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}
-static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
-{
- u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0) {
- printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return false;
- }
- return val & KVM_PV_EOI_ENABLED;
-}
-
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
+ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
return;
- }
+
__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
-static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return;
- }
+ u8 val;
+
+ if (pv_eoi_get_user(vcpu, &val) < 0)
+ return false;
+
+ val &= KVM_PV_EOI_ENABLED;
+
+ if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
+ return false;
+
+ /*
+ * Clear pending bit in any case: it will be set again on vmentry.
+ * While this might not be ideal from performance point of view,
+ * this makes sure pv eoi is only enabled when we know it's safe.
+ */
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+
+ return val;
}
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
@@ -1101,6 +1100,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
kvm_lapic_set_irr(vector, apic);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
+ } else {
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
}
break;
@@ -1172,8 +1174,8 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_lapic *src = NULL;
struct kvm_apic_map *map;
struct kvm_vcpu *vcpu;
- unsigned long bitmap;
- int i, vcpu_idx;
+ unsigned long bitmap, i;
+ int vcpu_idx;
bool ret;
rcu_read_lock();
@@ -1931,7 +1933,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
/* If the preempt notifier has already run, it also called apic_timer_expired */
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
- WARN_ON(rcuwait_active(&vcpu->wait));
+ WARN_ON(kvm_vcpu_is_blocking(vcpu));
apic_timer_expired(apic, false);
cancel_hv_timer(apic);
@@ -1948,7 +1950,6 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
restart_apic_timer(vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
{
@@ -1960,7 +1961,6 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
start_sw_timer(apic);
preempt_enable();
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
{
@@ -2677,7 +2677,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
struct kvm_lapic *apic)
{
- bool pending;
int vector;
/*
* PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
@@ -2691,14 +2690,8 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
* -> host enabled PV EOI, guest executed EOI.
*/
BUG_ON(!pv_eoi_enabled(vcpu));
- pending = pv_eoi_get_pending(vcpu);
- /*
- * Clear pending bit in any case: it will be set again on vmentry.
- * While this might not be ideal from performance point of view,
- * this makes sure pv eoi is only enabled when we know it's safe.
- */
- pv_eoi_clr_pending(vcpu);
- if (pending)
+
+ if (pv_eoi_test_and_clr_pending(vcpu))
return;
vector = apic_set_eoi(apic);
trace_kvm_pv_eoi(apic, vector);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9ae6168d381e..e9fbb2c8bbe2 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,7 +71,8 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
unsigned long cr4, u64 efer, gpa_t nested_cr3);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
- bool accessed_dirty, gpa_t new_eptp);
+ int huge_page_level, bool accessed_dirty,
+ gpa_t new_eptp);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
@@ -351,4 +352,17 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
{
atomic64_add(count, &kvm->stat.pages[level - 1]);
}
+
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception);
+
+static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu,
+ gpa_t gpa, u32 access,
+ struct x86_exception *exception)
+{
+ if (mmu != &vcpu->arch.nested_mmu)
+ return gpa;
+ return translate_nested_gpa(vcpu, gpa, access, exception);
+}
#endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fcdf3f8bb59a..593093b52395 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -335,12 +335,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
return likely(kvm_gen == spte_gen);
}
-static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception)
-{
- return gpa;
-}
-
static int is_cpuid_PSE36(void)
{
return 1;
@@ -1454,7 +1448,7 @@ static bool kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
{
u64 *sptep;
struct rmap_iterator iter;
- int need_flush = 0;
+ bool need_flush = false;
u64 new_spte;
kvm_pfn_t new_pfn;
@@ -1466,7 +1460,7 @@ restart:
rmap_printk("spte %p %llx gfn %llx (%d)\n",
sptep, *sptep, gfn, level);
- need_flush = 1;
+ need_flush = true;
if (pte_write(pte)) {
pte_list_remove(kvm, rmap_head, sptep);
@@ -1482,7 +1476,7 @@ restart:
if (need_flush && kvm_available_flush_tlb_with_range()) {
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
- return 0;
+ return false;
}
return need_flush;
@@ -1623,8 +1617,8 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
for_each_rmap_spte(rmap_head, &iter, sptep)
if (is_accessed_spte(*sptep))
- return 1;
- return 0;
+ return true;
+ return false;
}
#define RMAP_RECYCLE_THRESHOLD 1000
@@ -2086,10 +2080,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
role.direct = direct;
- if (role.direct)
- role.gpte_is_8_bytes = true;
role.access = access;
- if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
+ if (role.has_4_byte_gpte) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
@@ -2565,10 +2557,10 @@ static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
return r;
}
-static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
trace_kvm_mmu_unsync_page(sp);
- ++vcpu->kvm->stat.mmu_unsync;
+ ++kvm->stat.mmu_unsync;
sp->unsync = 1;
kvm_mmu_mark_parents_unsync(sp);
@@ -2580,7 +2572,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
* were marked unsync (or if there is no shadow page), -EPERM if the SPTE must
* be write-protected.
*/
-int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
+int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
gfn_t gfn, bool can_unsync, bool prefetch)
{
struct kvm_mmu_page *sp;
@@ -2591,7 +2583,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
* track machinery is used to write-protect upper-level shadow pages,
* i.e. this guards the role.level == 4K assertion below!
*/
- if (kvm_slot_page_track_is_active(vcpu, slot, gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
return -EPERM;
/*
@@ -2600,7 +2592,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
* that case, KVM must complete emulation of the guest TLB flush before
* allowing shadow pages to become unsync (writable by the guest).
*/
- for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
if (!can_unsync)
return -EPERM;
@@ -2619,7 +2611,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
*/
if (!locked) {
locked = true;
- spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+ spin_lock(&kvm->arch.mmu_unsync_pages_lock);
/*
* Recheck after taking the spinlock, a different vCPU
@@ -2634,10 +2626,10 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
}
WARN_ON(sp->role.level != PG_LEVEL_4K);
- kvm_unsync_page(vcpu, sp);
+ kvm_unsync_page(kvm, sp);
}
if (locked)
- spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+ spin_unlock(&kvm->arch.mmu_unsync_pages_lock);
/*
* We need to ensure that the marking of unsync pages is visible
@@ -3409,7 +3401,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
- int r = 0, i;
+ int r = 0, i, bkt;
/*
* Check if this is the first shadow root being allocated before
@@ -3434,7 +3426,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
- kvm_for_each_memslot(slot, slots) {
+ kvm_for_each_memslot(slot, bkt, slots) {
/*
* Both of these functions are no-ops if the target is
* already allocated, so unconditionally calling both
@@ -3734,21 +3726,13 @@ void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu)
kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
}
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
- u32 access, struct x86_exception *exception)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gpa_t vaddr, u32 access,
+ struct x86_exception *exception)
{
if (exception)
exception->error_code = 0;
- return vaddr;
-}
-
-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
- u32 access,
- struct x86_exception *exception)
-{
- if (exception)
- exception->error_code = 0;
- return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
+ return kvm_translate_gpa(vcpu, mmu, vaddr, access, exception);
}
static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3888,7 +3872,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
* guest is writing the page which is write tracked which can
* not be fixed by page fault handler.
*/
- if (kvm_slot_page_track_is_active(vcpu, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
return true;
return false;
@@ -4388,22 +4372,28 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
static void
__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
- u64 pa_bits_rsvd, bool execonly)
+ u64 pa_bits_rsvd, bool execonly, int huge_page_level)
{
u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
+ u64 large_1g_rsvd = 0, large_2m_rsvd = 0;
u64 bad_mt_xwr;
+ if (huge_page_level < PG_LEVEL_1G)
+ large_1g_rsvd = rsvd_bits(7, 7);
+ if (huge_page_level < PG_LEVEL_2M)
+ large_2m_rsvd = rsvd_bits(7, 7);
+
rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7);
rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7);
- rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6);
- rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6);
+ rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd;
+ rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd;
rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
/* large page */
rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
- rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29);
- rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20);
+ rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd;
+ rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd;
rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */
@@ -4419,10 +4409,11 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
}
static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context, bool execonly)
+ struct kvm_mmu *context, bool execonly, int huge_page_level)
{
__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
- vcpu->arch.reserved_gpa_bits, execonly);
+ vcpu->arch.reserved_gpa_bits, execonly,
+ huge_page_level);
}
static inline u64 reserved_hpa_bits(void)
@@ -4498,7 +4489,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
false, true);
else
__reset_rsvds_bits_mask_ept(shadow_zero_check,
- reserved_hpa_bits(), false);
+ reserved_hpa_bits(), false,
+ max_huge_page_level);
if (!shadow_me_mask)
return;
@@ -4518,7 +4510,8 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, bool execonly)
{
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
- reserved_hpa_bits(), execonly);
+ reserved_hpa_bits(), execonly,
+ max_huge_page_level);
}
#define BYTE_MASK(access) \
@@ -4767,7 +4760,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
role.base.ad_disabled = (shadow_accessed_mask == 0);
role.base.level = kvm_mmu_get_tdp_level(vcpu);
role.base.direct = true;
- role.base.gpte_is_8_bytes = true;
+ role.base.has_4_byte_gpte = false;
return role;
}
@@ -4812,7 +4805,7 @@ kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
- role.base.gpte_is_8_bytes = ____is_cr0_pg(regs) && ____is_cr4_pae(regs);
+ role.base.has_4_byte_gpte = ____is_cr0_pg(regs) && !____is_cr4_pae(regs);
return role;
}
@@ -4911,7 +4904,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
role.base.level = level;
- role.base.gpte_is_8_bytes = true;
+ role.base.has_4_byte_gpte = false;
role.base.direct = false;
role.base.ad_disabled = !accessed_dirty;
role.base.guest_mode = true;
@@ -4926,7 +4919,8 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
}
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
- bool accessed_dirty, gpa_t new_eptp)
+ int huge_page_level, bool accessed_dirty,
+ gpa_t new_eptp)
{
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
u8 level = vmx_eptp_page_walk_level(new_eptp);
@@ -4953,7 +4947,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
update_permission_bitmask(context, true);
context->pkru_mask = 0;
- reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+ reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
@@ -5017,13 +5011,13 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
* the gva_to_gpa functions between mmu and nested_mmu are swapped.
*/
if (!is_paging(vcpu))
- g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
+ g_context->gva_to_gpa = nonpaging_gva_to_gpa;
else if (is_long_mode(vcpu))
- g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+ g_context->gva_to_gpa = paging64_gva_to_gpa;
else if (is_pae(vcpu))
- g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+ g_context->gva_to_gpa = paging64_gva_to_gpa;
else
- g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
+ g_context->gva_to_gpa = paging32_gva_to_gpa;
reset_guest_paging_metadata(vcpu, g_context);
}
@@ -5188,7 +5182,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
gpa, bytes, sp->role.word);
offset = offset_in_page(gpa);
- pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
+ pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
/*
* Sometimes, the OS only writes the last one bytes to update status
@@ -5212,7 +5206,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
page_offset = offset_in_page(gpa);
level = sp->role.level;
*nspte = 1;
- if (!sp->role.gpte_is_8_bytes) {
+ if (sp->role.has_4_byte_gpte) {
page_offset <<= 1; /* 32->64 */
/*
* A 32-bit pde maps 4MB while the shadow pdes map
@@ -5524,10 +5518,13 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
mmu->root_hpa = INVALID_PAGE;
mmu->root_pgd = 0;
- mmu->translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+ /* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */
+ if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu)
+ return 0;
+
/*
* When using PAE paging, the four PDPTEs are treated as 'root' pages,
* while the PDP table is a per-vCPU construct that's allocated at MMU
@@ -5537,7 +5534,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
* generally doesn't use PAE paging and can skip allocating the PDP
* table. The main exception, handled here, is SVM's 32-bit NPT. The
* other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
- * KVM; that horror is handled on-demand by mmu_alloc_shadow_roots().
+ * KVM; that horror is handled on-demand by mmu_alloc_special_roots().
*/
if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
return 0;
@@ -5582,8 +5579,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
- vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-
ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu);
if (ret)
return ret;
@@ -5742,6 +5737,7 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
{
const struct kvm_memory_slot *memslot;
struct kvm_memslots *slots;
+ struct kvm_memslot_iter iter;
bool flush = false;
gfn_t start, end;
int i;
@@ -5751,13 +5747,16 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
- kvm_for_each_memslot(memslot, slots) {
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+ memslot = iter.slot;
start = max(gfn_start, memslot->base_gfn);
end = min(gfn_end, memslot->base_gfn + memslot->npages);
- if (start >= end)
+ if (WARN_ON_ONCE(start >= end))
continue;
flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
start, end - 1, true, flush);
}
@@ -5775,6 +5774,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
bool flush;
int i;
+ if (WARN_ON_ONCE(gfn_end <= gfn_start))
+ return;
+
write_lock(&kvm->mmu_lock);
kvm_inc_notifier_count(kvm, gfn_start, gfn_end);
@@ -5824,15 +5826,27 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
}
/*
- * We can flush all the TLBs out of the mmu lock without TLB
- * corruption since we just change the spte from writable to
- * readonly so that we only need to care the case of changing
- * spte from present to present (changing the spte from present
- * to nonpresent will flush all the TLBs immediately), in other
- * words, the only case we care is mmu_spte_update() where we
- * have checked Host-writable | MMU-writable instead of
- * PT_WRITABLE_MASK, that means it does not depend on PT_WRITABLE_MASK
- * anymore.
+ * Flush TLBs if any SPTEs had to be write-protected to ensure that
+ * guest writes are reflected in the dirty bitmap before the memslot
+ * update completes, i.e. before enabling dirty logging is visible to
+ * userspace.
+ *
+ * Perform the TLB flush outside the mmu_lock to reduce the amount of
+ * time the lock is held. However, this does mean that another CPU can
+ * now grab mmu_lock and encounter a write-protected SPTE while CPUs
+ * still have a writable mapping for the associated GFN in their TLB.
+ *
+ * This is safe but requires KVM to be careful when making decisions
+ * based on the write-protection status of an SPTE. Specifically, KVM
+ * also write-protects SPTEs to monitor changes to guest page tables
+ * during shadow paging, and must guarantee no CPUs can write to those
+ * page before the lock is dropped. As mentioned in the previous
+ * paragraph, a write-protected SPTE is no guarantee that CPU cannot
+ * perform writes. So to determine if a TLB flush is truly required, KVM
+ * will clear a separate software-only bit (MMU-writable) and skip the
+ * flush if-and-only-if this bit was already clear.
+ *
+ * See DEFAULT_SPTE_MMU_WRITEABLE for more details.
*/
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
@@ -6164,30 +6178,6 @@ out:
return ret;
}
-/*
- * Calculate mmu pages needed for kvm.
- */
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
-{
- unsigned long nr_mmu_pages;
- unsigned long nr_pages = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
- int i;
-
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- slots = __kvm_memslots(kvm, i);
-
- kvm_for_each_memslot(memslot, slots)
- nr_pages += memslot->npages;
- }
-
- nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
- nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
-
- return nr_mmu_pages;
-}
-
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 52c6527b1a06..da6166b5c377 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -104,7 +104,7 @@ static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
return kvm_mmu_role_as_id(sp->role);
}
-static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
{
/*
* When using the EPT page-modification log, the GPAs in the CPU dirty
@@ -112,13 +112,12 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
* on write protection to record dirty pages, which bypasses PML, since
* writes now result in a vmexit. Note, the check on CPU dirty logging
* being enabled is mandatory as the bits used to denote WP-only SPTEs
- * are reserved for NPT w/ PAE (32-bit KVM).
+ * are reserved for PAE paging (32-bit KVM).
*/
- return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
- kvm_x86_ops.cpu_dirty_log_size;
+ return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
}
-int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
+int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
gfn_t gfn, bool can_unsync, bool prefetch);
void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index b8151bbca36a..de5e8e4e1aa7 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -35,7 +35,7 @@
" %snxe %sad root %u %s%c", \
__entry->mmu_valid_gen, \
__entry->gfn, role.level, \
- role.gpte_is_8_bytes ? 8 : 4, \
+ role.has_4_byte_gpte ? 4 : 8, \
role.quadrant, \
role.direct ? " direct" : "", \
access_str[role.access], \
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index cc4eb5b7fb76..68eb1fb548b6 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -173,9 +173,9 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
/*
* check if the corresponding access on the specified guest page is tracked.
*/
-bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode)
+bool kvm_slot_page_track_is_active(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ gfn_t gfn, enum kvm_page_track_mode mode)
{
int index;
@@ -186,7 +186,7 @@ bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
return false;
if (mode == KVM_PAGE_TRACK_WRITE &&
- !kvm_page_track_write_tracking_enabled(vcpu->kvm))
+ !kvm_page_track_write_tracking_enabled(kvm))
return false;
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 708a5d297fe1..5b5bdac97c7b 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -403,9 +403,8 @@ retry_walk:
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- nested_access,
- &walker->fault);
+ real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
+ nested_access, &walker->fault);
/*
* FIXME: This can happen if emulation (for of an INS/OUTS
@@ -467,7 +466,7 @@ retry_walk:
if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36())
gfn += pse36_gfn_delta(pte);
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
+ real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault);
if (real_gpa == UNMAPPED_GVA)
return 0;
@@ -547,16 +546,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
access);
}
-#if PTTYPE != PTTYPE_EPT
-static int FNAME(walk_addr_nested)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, gva_t addr,
- u32 access)
-{
- return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
- addr, access);
-}
-#endif
-
static bool
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, pt_element_t gpte, bool no_dirty_log)
@@ -1000,50 +989,29 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
}
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gpa_t addr, u32 access,
struct x86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, addr, access);
-
- if (r) {
- gpa = gfn_to_gpa(walker.gfn);
- gpa |= addr & ~PAGE_MASK;
- } else if (exception)
- *exception = walker.fault;
-
- return gpa;
-}
-
-#if PTTYPE != PTTYPE_EPT
-/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
- u32 access,
- struct x86_exception *exception)
-{
- struct guest_walker walker;
- gpa_t gpa = UNMAPPED_GVA;
- int r;
-
#ifndef CONFIG_X86_64
/* A 64-bit GVA should be impossible on 32-bit KVM. */
- WARN_ON_ONCE(vaddr >> 32);
+ WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu);
#endif
- r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
+ r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
- gpa |= vaddr & ~PAGE_MASK;
+ gpa |= addr & ~PAGE_MASK;
} else if (exception)
*exception = walker.fault;
return gpa;
}
-#endif
/*
* Using the cached information from sp->gfns is safe because:
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index fad546df0bba..73cfe62fdad1 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -91,7 +91,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
}
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- struct kvm_memory_slot *slot,
+ const struct kvm_memory_slot *slot,
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
u64 old_spte, bool prefetch, bool can_unsync,
bool host_writable, u64 *new_spte)
@@ -102,7 +102,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (sp->role.ad_disabled)
spte |= SPTE_TDP_AD_DISABLED_MASK;
- else if (kvm_vcpu_ad_need_write_protect(vcpu))
+ else if (kvm_mmu_page_ad_need_write_protect(sp))
spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
/*
@@ -162,7 +162,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* e.g. it's write-tracked (upper-level SPs) or has one or more
* shadow pages and unsync'ing pages is not allowed.
*/
- if (mmu_try_to_unsync_pages(vcpu, slot, gfn, can_unsync, prefetch)) {
+ if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
pgprintk("%s: found shadow page for %llx, marking ro\n",
__func__, gfn);
wrprot = true;
@@ -216,6 +216,7 @@ u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~shadow_host_writable_mask;
+ new_spte &= ~shadow_mmu_writable_mask;
new_spte = mark_spte_for_access_track(new_spte);
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index cc432f9a966b..be6a007a4af3 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -60,10 +60,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-/* Bits 9 and 10 are ignored by all non-EPT PTEs. */
-#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9)
-#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10)
-
/*
* The mask/shift to use for saving the original R/X bits when marking the PTE
* as not-present for access tracking purposes. We do not save the W bit as the
@@ -79,6 +75,35 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
static_assert(!(SPTE_TDP_AD_MASK & SHADOW_ACC_TRACK_SAVED_MASK));
/*
+ * *_SPTE_HOST_WRITEABLE (aka Host-writable) indicates whether the host permits
+ * writes to the guest page mapped by the SPTE. This bit is cleared on SPTEs
+ * that map guest pages in read-only memslots and read-only VMAs.
+ *
+ * Invariants:
+ * - If Host-writable is clear, PT_WRITABLE_MASK must be clear.
+ *
+ *
+ * *_SPTE_MMU_WRITEABLE (aka MMU-writable) indicates whether the shadow MMU
+ * allows writes to the guest page mapped by the SPTE. This bit is cleared when
+ * the guest page mapped by the SPTE contains a page table that is being
+ * monitored for shadow paging. In this case the SPTE can only be made writable
+ * by unsyncing the shadow page under the mmu_lock.
+ *
+ * Invariants:
+ * - If MMU-writable is clear, PT_WRITABLE_MASK must be clear.
+ * - If MMU-writable is set, Host-writable must be set.
+ *
+ * If MMU-writable is set, PT_WRITABLE_MASK is normally set but can be cleared
+ * to track writes for dirty logging. For such SPTEs, KVM will locklessly set
+ * PT_WRITABLE_MASK upon the next write from the guest and record the write in
+ * the dirty log (see fast_page_fault()).
+ */
+
+/* Bits 9 and 10 are ignored by all non-EPT PTEs. */
+#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9)
+#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10)
+
+/*
* Low ignored bits are at a premium for EPT, use high ignored bits, taking care
* to not overlap the A/D type mask or the saved access bits of access-tracked
* SPTEs when A/D bits are disabled.
@@ -316,8 +341,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
static inline bool spte_can_locklessly_be_made_writable(u64 spte)
{
- return (spte & shadow_host_writable_mask) &&
- (spte & shadow_mmu_writable_mask);
+ if (spte & shadow_mmu_writable_mask) {
+ WARN_ON_ONCE(!(spte & shadow_host_writable_mask));
+ return true;
+ }
+
+ WARN_ON_ONCE(spte & PT_WRITABLE_MASK);
+ return false;
}
static inline u64 get_mmio_spte_generation(u64 spte)
@@ -330,7 +360,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
}
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- struct kvm_memory_slot *slot,
+ const struct kvm_memory_slot *slot,
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
u64 old_spte, bool prefetch, bool can_unsync,
bool host_writable, u64 *new_spte);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1beb4ca90560..bc9e3553fba2 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -165,7 +165,7 @@ static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
role.direct = true;
- role.gpte_is_8_bytes = true;
+ role.has_4_byte_gpte = false;
role.access = ACC_ALL;
role.ad_disabled = !shadow_accessed_mask;
@@ -1442,12 +1442,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- if (!is_writable_pte(iter.old_spte))
- break;
-
new_spte = iter.old_spte &
~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
+ if (new_spte == iter.old_spte)
+ break;
+
tdp_mmu_set_spte(kvm, &iter, new_spte);
spte_set = true;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0c2133eb4cf6..f614f95acc6b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -13,6 +13,8 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
@@ -55,43 +57,41 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
kvm_pmu_deliver_pmi(vcpu);
}
-static void kvm_perf_overflow(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
- struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
- }
+ /* Ignore counters that have been reprogrammed already. */
+ if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
+ return;
+
+ __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+
+ if (!pmc->intr)
+ return;
+
+ /*
+ * Inject PMI. If vcpu was in a guest mode during NMI PMI
+ * can be ejected on a guest mode re-entry. Otherwise we can't
+ * be sure that vcpu wasn't executing hlt instruction at the
+ * time of vmexit and is not going to re-enter guest mode until
+ * woken up. So we should wake it, but this is impossible from
+ * NMI context. Do it from irq work instead.
+ */
+ if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
+ irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
+ else
+ kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
-static void kvm_perf_overflow_intr(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static void kvm_perf_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
-
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (!kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
- kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
- }
+ __kvm_perf_overflow(pmc, true);
}
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
@@ -111,6 +111,9 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
};
+ if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
+ return;
+
attr.sample_period = get_sample_period(pmc, pmc->counter);
if (in_tx)
@@ -126,7 +129,6 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
}
event = perf_event_create_kernel_counter(&attr, -1, current,
- intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
@@ -138,6 +140,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
pmc_to_pmu(pmc)->event_count++;
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
pmc->is_paused = false;
+ pmc->intr = intr;
}
static void pmc_pause_counter(struct kvm_pmc *pmc)
@@ -171,13 +174,16 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
+static int cmp_u64(const void *a, const void *b)
+{
+ return *(__u64 *)a - *(__u64 *)b;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
- u8 event_select, unit_mask;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
- int i;
bool allow_event = true;
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
@@ -192,31 +198,23 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (filter) {
- for (i = 0; i < filter->nevents; i++)
- if (filter->events[i] ==
- (eventsel & AMD64_RAW_EVENT_MASK_NB))
- break;
- if (filter->action == KVM_PMU_EVENT_ALLOW &&
- i == filter->nevents)
- allow_event = false;
- if (filter->action == KVM_PMU_EVENT_DENY &&
- i < filter->nevents)
- allow_event = false;
+ __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
+
+ if (bsearch(&key, filter->events, filter->nevents,
+ sizeof(__u64), cmp_u64))
+ allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
+ else
+ allow_event = filter->action == KVM_PMU_EVENT_DENY;
}
if (!allow_event)
return;
- event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
- unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
ARCH_PERFMON_EVENTSEL_INV |
ARCH_PERFMON_EVENTSEL_CMASK |
HSW_IN_TX |
HSW_IN_TX_CHECKPOINTED))) {
- config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
- event_select,
- unit_mask);
+ config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
if (config != PERF_COUNT_HW_MAX)
type = PERF_TYPE_HARDWARE;
}
@@ -268,7 +266,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- kvm_x86_ops.pmu_ops->find_fixed_event(idx),
+ kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
!(en_field & 0x2), /* exclude user */
!(en_field & 0x1), /* exclude kernel */
pmi, false, false);
@@ -490,6 +488,66 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
kvm_pmu_reset(vcpu);
}
+static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u64 prev_count;
+
+ prev_count = pmc->counter;
+ pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
+
+ reprogram_counter(pmu, pmc->idx);
+ if (pmc->counter < prev_count)
+ __kvm_perf_overflow(pmc, false);
+}
+
+static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
+ unsigned int perf_hw_id)
+{
+ u64 old_eventsel = pmc->eventsel;
+ unsigned int config;
+
+ pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
+ config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
+ pmc->eventsel = old_eventsel;
+ return config == perf_hw_id;
+}
+
+static inline bool cpl_is_matched(struct kvm_pmc *pmc)
+{
+ bool select_os, select_user;
+ u64 config = pmc->current_config;
+
+ if (pmc_is_gp(pmc)) {
+ select_os = config & ARCH_PERFMON_EVENTSEL_OS;
+ select_user = config & ARCH_PERFMON_EVENTSEL_USR;
+ } else {
+ select_os = config & 0x1;
+ select_user = config & 0x2;
+ }
+
+ return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
+}
+
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int i;
+
+ for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+ if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
+ continue;
+
+ /* Ignore checks for edge detect, pin control, invert and CMASK bits */
+ if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
+ kvm_pmu_incr_counter(pmc);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
{
struct kvm_pmu_event_filter tmp, *filter;
@@ -521,6 +579,11 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
/* Ensure nevents can't be changed between the user copies. */
*filter = tmp;
+ /*
+ * Sort the in-kernel list so that we can search it with bsearch.
+ */
+ sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
+
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
mutex_is_locked(&kvm->lock));
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 59d6b76203d5..7a7b8d5b775e 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -24,9 +24,7 @@ struct kvm_event_hw_type_mapping {
};
struct kvm_pmu_ops {
- unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select,
- u8 unit_mask);
- unsigned (*find_fixed_event)(int idx);
+ unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
@@ -159,6 +157,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8f9af7b7dbbe..90364d02f22a 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -293,15 +293,18 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
+ /*
+ * Wake any target vCPUs that are blocking, i.e. waiting for a wake
+ * event. There's no need to signal doorbells, as hardware has handled
+ * vCPUs that were in guest at the time of the IPI, and vCPUs that have
+ * since entered the guest will have processed pending IRQs at VMRUN.
+ */
kvm_for_each_vcpu(i, vcpu, kvm) {
- bool m = kvm_apic_match_dest(vcpu, source,
- icrl & APIC_SHORT_MASK,
- GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK);
-
- if (m && !avic_vcpu_is_running(vcpu))
+ if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & APIC_DEST_MASK))
kvm_vcpu_wake_up(vcpu);
}
}
@@ -672,16 +675,42 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
return -1;
kvm_lapic_set_irr(vec, vcpu->arch.apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode, which guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that the below code will signal
+ * the doorbell if the vCPU is already running in the guest.
+ */
smp_mb__after_atomic();
- if (avic_vcpu_is_running(vcpu)) {
- int cpuid = vcpu->cpu;
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
+ * is in the guest. If the vCPU is not in the guest, hardware will
+ * automatically process AVIC interrupts at VMRUN.
+ */
+ if (vcpu->mode == IN_GUEST_MODE) {
+ int cpu = READ_ONCE(vcpu->cpu);
- if (cpuid != get_cpu())
- wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any
+ * point, which could result in signalling the wrong/previous
+ * pCPU. But if that happens the vCPU is guaranteed to do a
+ * VMRUN (after being migrated) and thus will process pending
+ * interrupts, i.e. a doorbell is not needed (and the spurious
+ * one is harmless).
+ */
+ if (cpu != get_cpu())
+ wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
put_cpu();
- } else
+ } else {
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
kvm_vcpu_wake_up(vcpu);
+ }
return 0;
}
@@ -949,6 +978,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
/*
* Since the host physical APIC id is 8 bits,
* we can support host APIC ID upto 255.
@@ -956,19 +987,25 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
+ /*
+ * No need to update anything if the vCPU is blocking, i.e. if the vCPU
+ * is being scheduled in after being preempted. The CPU entries in the
+ * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
+ * If the vCPU was migrated, its new CPU value will be stuffed when the
+ * vCPU unblocks.
+ */
+ if (kvm_vcpu_is_blocking(vcpu))
+ return;
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
-
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
- if (svm->avic_is_running)
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
- avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
- svm->avic_is_running);
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -976,42 +1013,56 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
- if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
- avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+ /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
+ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+ return;
+
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/*
- * This function is called during VCPU halt/unhalt.
- */
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
- svm->avic_is_running = is_run;
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
- if (kvm_vcpu_apicv_active(vcpu)) {
- if (is_run)
- avic_vcpu_load(vcpu, cpu);
- else
- avic_vcpu_put(vcpu);
- }
- put_cpu();
+ preempt_disable();
+
+ /*
+ * Unload the AVIC when the vCPU is about to block, _before_
+ * the vCPU actually blocks.
+ *
+ * Any IRQs that arrive before IsRunning=0 will not cause an
+ * incomplete IPI vmexit on the source, therefore vIRR will also
+ * be checked by kvm_vcpu_check_block() before blocking. The
+ * memory barrier implicit in set_current_state orders writing
+ * IsRunning=0 before reading the vIRR. The processor needs a
+ * matching memory barrier on interrupt delivery between writing
+ * IRR and reading IsRunning; the lack of this barrier might be
+ * the cause of errata #1235).
+ */
+ avic_vcpu_put(vcpu);
+
+ preempt_enable();
}
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- avic_set_running(vcpu, false);
-}
+ int cpu;
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
- kvm_vcpu_update_apicv(vcpu);
- avic_set_running(vcpu, true);
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ cpu = get_cpu();
+ WARN_ON(cpu != vcpu->cpu);
+
+ avic_vcpu_load(vcpu, cpu);
+
+ put_cpu();
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index f8b7bc04b3e7..cf206855ebf0 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep
struct vcpu_svm *svm = to_svm(vcpu);
WARN_ON(!is_guest_mode(vcpu));
- if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !svm->nested.nested_run_pending) {
+ if (vmcb12_is_intercept(&svm->nested.ctl,
+ INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
+ !svm->nested.nested_run_pending) {
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = fault->error_code;
@@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
void recalc_intercepts(struct vcpu_svm *svm)
{
- struct vmcb_control_area *c, *h, *g;
+ struct vmcb_control_area *c, *h;
+ struct vmcb_ctrl_area_cached *g;
unsigned int i;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -163,37 +165,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}
-static void copy_vmcb_control_area(struct vmcb_control_area *dst,
- struct vmcb_control_area *from)
-{
- unsigned int i;
-
- for (i = 0; i < MAX_INTERCEPT; i++)
- dst->intercepts[i] = from->intercepts[i];
-
- dst->iopm_base_pa = from->iopm_base_pa;
- dst->msrpm_base_pa = from->msrpm_base_pa;
- dst->tsc_offset = from->tsc_offset;
- /* asid not copied, it is handled manually for svm->vmcb. */
- dst->tlb_ctl = from->tlb_ctl;
- dst->int_ctl = from->int_ctl;
- dst->int_vector = from->int_vector;
- dst->int_state = from->int_state;
- dst->exit_code = from->exit_code;
- dst->exit_code_hi = from->exit_code_hi;
- dst->exit_info_1 = from->exit_info_1;
- dst->exit_info_2 = from->exit_info_2;
- dst->exit_int_info = from->exit_int_info;
- dst->exit_int_info_err = from->exit_int_info_err;
- dst->nested_ctl = from->nested_ctl;
- dst->event_inj = from->event_inj;
- dst->event_inj_err = from->event_inj_err;
- dst->nested_cr3 = from->nested_cr3;
- dst->virt_ext = from->virt_ext;
- dst->pause_filter_count = from->pause_filter_count;
- dst->pause_filter_thresh = from->pause_filter_thresh;
-}
-
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
/*
@@ -203,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
*/
int i;
- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return true;
for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -250,10 +221,10 @@ static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
}
}
-static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_control_area *control)
+static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcb_ctrl_area_cached *control)
{
- if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
+ if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;
if (CC(control->asid == 0))
@@ -275,9 +246,20 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
return true;
}
-static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
- struct vmcb_save_area *save)
+/* Common checks that apply to both L1 and L2 state. */
+static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
+ struct vmcb_save_area_cached *save)
{
+ if (CC(!(save->efer & EFER_SVME)))
+ return false;
+
+ if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
+ CC(save->cr0 & ~0xffffffffULL))
+ return false;
+
+ if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
+ return false;
+
/*
* These checks are also performed by KVM_SET_SREGS,
* except that EFER.LMA is not checked by SVM against
@@ -293,48 +275,90 @@ static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
return false;
+ if (CC(!kvm_valid_efer(vcpu, save->efer)))
+ return false;
+
return true;
}
-/* Common checks that apply to both L1 and L2 state. */
-static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
- struct vmcb_save_area *save)
+static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
{
- /*
- * FIXME: these should be done after copying the fields,
- * to avoid TOC/TOU races. For these save area checks
- * the possible damage is limited since kvm_set_cr0 and
- * kvm_set_cr4 handle failure; EFER_SVME is an exception
- * so it is force-set later in nested_prepare_vmcb_save.
- */
- if (CC(!(save->efer & EFER_SVME)))
- return false;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_save_area_cached *save = &svm->nested.save;
- if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
- CC(save->cr0 & ~0xffffffffULL))
- return false;
+ return __nested_vmcb_check_save(vcpu, save);
+}
- if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
- return false;
+static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
- if (!nested_vmcb_check_cr3_cr4(vcpu, save))
- return false;
+ return __nested_vmcb_check_controls(vcpu, ctl);
+}
- if (CC(!kvm_valid_efer(vcpu, save->efer)))
- return false;
+static
+void __nested_copy_vmcb_control_to_cache(struct vmcb_ctrl_area_cached *to,
+ struct vmcb_control_area *from)
+{
+ unsigned int i;
- return true;
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ to->intercepts[i] = from->intercepts[i];
+
+ to->iopm_base_pa = from->iopm_base_pa;
+ to->msrpm_base_pa = from->msrpm_base_pa;
+ to->tsc_offset = from->tsc_offset;
+ to->tlb_ctl = from->tlb_ctl;
+ to->int_ctl = from->int_ctl;
+ to->int_vector = from->int_vector;
+ to->int_state = from->int_state;
+ to->exit_code = from->exit_code;
+ to->exit_code_hi = from->exit_code_hi;
+ to->exit_info_1 = from->exit_info_1;
+ to->exit_info_2 = from->exit_info_2;
+ to->exit_int_info = from->exit_int_info;
+ to->exit_int_info_err = from->exit_int_info_err;
+ to->nested_ctl = from->nested_ctl;
+ to->event_inj = from->event_inj;
+ to->event_inj_err = from->event_inj_err;
+ to->nested_cr3 = from->nested_cr3;
+ to->virt_ext = from->virt_ext;
+ to->pause_filter_count = from->pause_filter_count;
+ to->pause_filter_thresh = from->pause_filter_thresh;
+
+ /* Copy asid here because nested_vmcb_check_controls will check it. */
+ to->asid = from->asid;
+ to->msrpm_base_pa &= ~0x0fffULL;
+ to->iopm_base_pa &= ~0x0fffULL;
}
-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
- copy_vmcb_control_area(&svm->nested.ctl, control);
+ __nested_copy_vmcb_control_to_cache(&svm->nested.ctl, control);
+}
+
+static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
+ struct vmcb_save_area *from)
+{
+ /*
+ * Copy only fields that are validated, as we need them
+ * to avoid TOC/TOU races.
+ */
+ to->efer = from->efer;
+ to->cr0 = from->cr0;
+ to->cr3 = from->cr3;
+ to->cr4 = from->cr4;
- /* Copy it here because nested_svm_check_controls will check it. */
- svm->nested.ctl.asid = control->asid;
- svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
- svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
+ to->dr6 = from->dr6;
+ to->dr7 = from->dr7;
+}
+
+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+ struct vmcb_save_area *save)
+{
+ __nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
}
/*
@@ -437,14 +461,13 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
return -EINVAL;
if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
- CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
+ CC(!load_pdptrs(vcpu, cr3)))
return -EINVAL;
if (!nested_npt)
kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
kvm_init_mmu(vcpu);
@@ -490,15 +513,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
- /*
- * Force-set EFER_SVME even though it is checked earlier on the
- * VMCB12, because the guest can flip the bit between the check
- * and now. Clearing EFER_SVME would call svm_free_nested.
- */
- svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
+ svm_set_efer(&svm->vcpu, svm->nested.save.efer);
- svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
- svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
+ svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
+ svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
svm->vcpu.arch.cr2 = vmcb12->save.cr2;
@@ -513,8 +531,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
/* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
- svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
- svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
+ svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
+ svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
}
}
@@ -628,7 +646,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_control(svm);
nested_vmcb02_prepare_save(svm, vmcb12);
- ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
+ ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
nested_npt_enabled(svm), from_vmrun);
if (ret)
return ret;
@@ -678,10 +696,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(!svm->nested.initialized))
return -EINVAL;
- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+ nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
- if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
- !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
+ if (!nested_vmcb_check_save(vcpu) ||
+ !nested_vmcb_check_controls(vcpu)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
vmcb12->control.exit_code_hi = 0;
vmcb12->control.exit_info_1 = 0;
@@ -988,7 +1007,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
u32 offset, msr, value;
int write, mask;
- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return NESTED_EXIT_HOST;
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -1015,7 +1034,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
u8 start_bit;
u64 gpa;
- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
return NESTED_EXIT_HOST;
port = svm->vmcb->control.exit_info_1 >> 16;
@@ -1046,12 +1065,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
vmexit = nested_svm_intercept_ioio(svm);
break;
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
@@ -1069,7 +1088,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
break;
}
default: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
}
}
@@ -1147,7 +1166,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
static inline bool nested_exit_on_init(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
}
static int svm_check_nested_events(struct kvm_vcpu *vcpu)
@@ -1251,11 +1270,47 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
}
+/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
+static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
+ struct vmcb_ctrl_area_cached *from)
+{
+ unsigned int i;
+
+ memset(dst, 0, sizeof(struct vmcb_control_area));
+
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ dst->intercepts[i] = from->intercepts[i];
+
+ dst->iopm_base_pa = from->iopm_base_pa;
+ dst->msrpm_base_pa = from->msrpm_base_pa;
+ dst->tsc_offset = from->tsc_offset;
+ dst->asid = from->asid;
+ dst->tlb_ctl = from->tlb_ctl;
+ dst->int_ctl = from->int_ctl;
+ dst->int_vector = from->int_vector;
+ dst->int_state = from->int_state;
+ dst->exit_code = from->exit_code;
+ dst->exit_code_hi = from->exit_code_hi;
+ dst->exit_info_1 = from->exit_info_1;
+ dst->exit_info_2 = from->exit_info_2;
+ dst->exit_int_info = from->exit_int_info;
+ dst->exit_int_info_err = from->exit_int_info_err;
+ dst->nested_ctl = from->nested_ctl;
+ dst->event_inj = from->event_inj;
+ dst->event_inj_err = from->event_inj_err;
+ dst->nested_cr3 = from->nested_cr3;
+ dst->virt_ext = from->virt_ext;
+ dst->pause_filter_count = from->pause_filter_count;
+ dst->pause_filter_thresh = from->pause_filter_thresh;
+}
+
static int svm_get_nested_state(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
u32 user_data_size)
{
struct vcpu_svm *svm;
+ struct vmcb_control_area *ctl;
+ unsigned long r;
struct kvm_nested_state kvm_state = {
.flags = 0,
.format = KVM_STATE_NESTED_FORMAT_SVM,
@@ -1297,9 +1352,18 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
*/
if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
return -EFAULT;
- if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
- sizeof(user_vmcb->control)))
+
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+ if (!ctl)
+ return -ENOMEM;
+
+ nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
+ r = copy_to_user(&user_vmcb->control, ctl,
+ sizeof(user_vmcb->control));
+ kfree(ctl);
+ if (r)
return -EFAULT;
+
if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
sizeof(user_vmcb->save)))
return -EFAULT;
@@ -1316,6 +1380,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
&user_kvm_nested_state->data.svm[0];
struct vmcb_control_area *ctl;
struct vmcb_save_area *save;
+ struct vmcb_save_area_cached save_cached;
+ struct vmcb_ctrl_area_cached ctl_cached;
unsigned long cr0;
int ret;
@@ -1368,7 +1434,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
goto out_free;
ret = -EINVAL;
- if (!nested_vmcb_check_controls(vcpu, ctl))
+ __nested_copy_vmcb_control_to_cache(&ctl_cached, ctl);
+ if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
goto out_free;
/*
@@ -1383,10 +1450,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* Validate host state saved from before VMRUN (see
* nested_svm_check_permissions).
*/
+ __nested_copy_vmcb_save_to_cache(&save_cached, save);
if (!(save->cr0 & X86_CR0_PG) ||
!(save->cr0 & X86_CR0_PE) ||
(save->rflags & X86_EFLAGS_VM) ||
- !nested_vmcb_valid_sregs(vcpu, save))
+ !__nested_vmcb_check_save(vcpu, &save_cached))
goto out_free;
/*
@@ -1422,7 +1490,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
- nested_load_control_from_vmcb12(svm, ctl);
+ nested_copy_vmcb_control_to_cache(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
@@ -1449,7 +1517,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
* the guest CR3 might be restored prior to setting the nested
* state which can lead to a load of wrong PDPTRs.
*/
- if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
return false;
if (!nested_svm_vmrun_msrpm(svm)) {
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index b4095dfeeee6..5aa45f13b16d 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -16,6 +16,7 @@
#include "cpuid.h"
#include "lapic.h"
#include "pmu.h"
+#include "svm.h"
enum pmu_type {
PMU_TYPE_COUNTER = 0,
@@ -100,6 +101,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+ if (!enable_pmu)
+ return NULL;
+
switch (msr) {
case MSR_F15H_PERF_CTL0:
case MSR_F15H_PERF_CTL1:
@@ -134,12 +138,16 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
return &pmu->gp_counters[msr_to_index(msr)];
}
-static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
- u8 event_select,
- u8 unit_mask)
+static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
+ u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+ u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
+ /* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
+ if (WARN_ON(pmc_is_fixed(pmc)))
+ return PERF_COUNT_HW_MAX;
+
for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
if (amd_event_mapping[i].eventsel == event_select
&& amd_event_mapping[i].unit_mask == unit_mask)
@@ -151,12 +159,6 @@ static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
return amd_event_mapping[i].event_type;
}
-/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
-static unsigned amd_find_fixed_event(int idx)
-{
- return PERF_COUNT_HW_MAX;
-}
-
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
* AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE).
*/
@@ -319,8 +321,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
}
struct kvm_pmu_ops amd_pmu_ops = {
- .find_arch_event = amd_find_arch_event,
- .find_fixed_event = amd_find_fixed_event,
+ .pmc_perf_hw_id = amd_pmc_perf_hw_id,
.pmc_is_enabled = amd_pmc_is_enabled,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index be2883141220..6a22798eaaee 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -636,7 +636,8 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_vcpu *vcpu;
- int i, ret;
+ unsigned long i;
+ int ret;
if (!sev_es_guest(kvm))
return -ENOTTY;
@@ -1593,7 +1594,7 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
static int sev_lock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i, j;
+ unsigned long i, j;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mutex_lock_killable(&vcpu->mutex))
@@ -1615,7 +1616,7 @@ out_unlock:
static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_unlock(&vcpu->mutex);
@@ -1642,7 +1643,7 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *dst_vcpu, *src_vcpu;
struct vcpu_svm *dst_svm, *src_svm;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9079d2fdc12e..2c99b18d76c0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -265,7 +265,7 @@ u32 svm_msrpm_offset(u32 msr)
#define MAX_INST_SIZE 15
-static int get_max_npt_level(void)
+static int get_npt_level(void)
{
#ifdef CONFIG_X86_64
return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
@@ -585,12 +585,10 @@ static int svm_cpu_init(int cpu)
if (!sd)
return ret;
sd->cpu = cpu;
- sd->save_area = alloc_page(GFP_KERNEL);
+ sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!sd->save_area)
goto free_cpu_data;
- clear_page(page_address(sd->save_area));
-
ret = sev_cpu_init(sd);
if (ret)
goto free_save_area;
@@ -871,47 +869,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
-/*
- * The default MMIO mask is a single bit (excluding the present bit),
- * which could conflict with the memory encryption bit. Check for
- * memory encryption support and override the default MMIO mask if
- * memory encryption is enabled.
- */
-static __init void svm_adjust_mmio_mask(void)
-{
- unsigned int enc_bit, mask_bit;
- u64 msr, mask;
-
- /* If there is no memory encryption support, use existing mask */
- if (cpuid_eax(0x80000000) < 0x8000001f)
- return;
-
- /* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
- if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
- return;
-
- enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
- mask_bit = boot_cpu_data.x86_phys_bits;
-
- /* Increment the mask bit if it is the same as the encryption bit */
- if (enc_bit == mask_bit)
- mask_bit++;
-
- /*
- * If the mask bit location is below 52, then some bits above the
- * physical addressing limit will always be reserved, so use the
- * rsvd_bits() function to generate the mask. This mask, along with
- * the present bit, will be used to generate a page fault with
- * PFER.RSV = 1.
- *
- * If the mask bit location is 52 (or above), then clear the mask.
- */
- mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
-
- kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
-}
-
static void svm_hardware_teardown(void)
{
int cpu;
@@ -926,191 +883,6 @@ static void svm_hardware_teardown(void)
iopm_base = 0;
}
-static __init void svm_set_cpu_caps(void)
-{
- kvm_set_cpu_caps();
-
- supported_xss = 0;
-
- /* CPUID 0x80000001 and 0x8000000A (SVM features) */
- if (nested) {
- kvm_cpu_cap_set(X86_FEATURE_SVM);
-
- if (nrips)
- kvm_cpu_cap_set(X86_FEATURE_NRIPS);
-
- if (npt_enabled)
- kvm_cpu_cap_set(X86_FEATURE_NPT);
-
- if (tsc_scaling)
- kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
-
- /* Nested VM can receive #VMEXIT instead of triggering #GP */
- kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
- }
-
- /* CPUID 0x80000008 */
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-
- /* CPUID 0x8000001F (SME/SEV features) */
- sev_set_cpu_caps();
-}
-
-static __init int svm_hardware_setup(void)
-{
- int cpu;
- struct page *iopm_pages;
- void *iopm_va;
- int r;
- unsigned int order = get_order(IOPM_SIZE);
-
- /*
- * NX is required for shadow paging and for NPT if the NX huge pages
- * mitigation is enabled.
- */
- if (!boot_cpu_has(X86_FEATURE_NX)) {
- pr_err_ratelimited("NX (Execute Disable) not supported\n");
- return -EOPNOTSUPP;
- }
- kvm_enable_efer_bits(EFER_NX);
-
- iopm_pages = alloc_pages(GFP_KERNEL, order);
-
- if (!iopm_pages)
- return -ENOMEM;
-
- iopm_va = page_address(iopm_pages);
- memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
- iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
-
- init_msrpm_offsets();
-
- supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
- if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
- kvm_enable_efer_bits(EFER_FFXSR);
-
- if (tsc_scaling) {
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- tsc_scaling = false;
- } else {
- pr_info("TSC scaling supported\n");
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 32;
- }
- }
-
- tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
-
- /* Check for pause filtering support */
- if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- pause_filter_count = 0;
- pause_filter_thresh = 0;
- } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
- pause_filter_thresh = 0;
- }
-
- if (nested) {
- printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
- kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
- }
-
- /*
- * KVM's MMU doesn't support using 2-level paging for itself, and thus
- * NPT isn't supported if the host is using 2-level paging since host
- * CR4 is unchanged on VMRUN.
- */
- if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
- npt_enabled = false;
-
- if (!boot_cpu_has(X86_FEATURE_NPT))
- npt_enabled = false;
-
- /* Force VM NPT level equal to the host's max NPT level */
- kvm_configure_mmu(npt_enabled, get_max_npt_level(),
- get_max_npt_level(), PG_LEVEL_1G);
- pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
-
- /* Note, SEV setup consumes npt_enabled. */
- sev_hardware_setup();
-
- svm_hv_hardware_setup();
-
- svm_adjust_mmio_mask();
-
- for_each_possible_cpu(cpu) {
- r = svm_cpu_init(cpu);
- if (r)
- goto err;
- }
-
- if (nrips) {
- if (!boot_cpu_has(X86_FEATURE_NRIPS))
- nrips = false;
- }
-
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-
- if (enable_apicv) {
- pr_info("AVIC enabled\n");
-
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
-
- if (vls) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
- !IS_ENABLED(CONFIG_X86_64)) {
- vls = false;
- } else {
- pr_info("Virtual VMLOAD VMSAVE supported\n");
- }
- }
-
- if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
- svm_gp_erratum_intercept = false;
-
- if (vgif) {
- if (!boot_cpu_has(X86_FEATURE_VGIF))
- vgif = false;
- else
- pr_info("Virtual GIF supported\n");
- }
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
- svm_set_cpu_caps();
-
- /*
- * It seems that on AMD processors PTE's accessed bit is
- * being set by the CPU hardware before the NPF vmexit.
- * This is not expected behaviour and our tests fail because
- * of it.
- * A workaround here is to disable support for
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
- * In this case userspace can know if there is support using
- * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
- * it
- * If future AMD CPU models change the behaviour described above,
- * this variable can be changed accordingly
- */
- allow_smaller_maxphyaddr = !npt_enabled;
-
- return 0;
-
-err:
- svm_hardware_teardown();
- return r;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -1435,12 +1207,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (err)
goto error_free_vmsa_page;
- /* We initialize this flag to true to make sure that the is_running
- * bit would be set the first time the vcpu is loaded.
- */
- if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
- svm->avic_is_running = true;
-
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm) {
err = -ENOMEM;
@@ -1596,10 +1362,16 @@ static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
+ kvm_register_mark_available(vcpu, reg);
+
switch (reg) {
case VCPU_EXREG_PDPTR:
- BUG_ON(!npt_enabled);
- load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
+ /*
+ * When !npt_enabled, mmu->pdptrs[] is already available since
+ * it is always updated per SDM when moving to CRs.
+ */
+ if (npt_enabled)
+ load_pdptrs(vcpu, kvm_read_cr3(vcpu));
break;
default:
KVM_BUG_ON(1, vcpu->kvm);
@@ -1786,6 +1558,24 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
vmcb_mark_dirty(svm->vmcb, VMCB_DT);
}
+static void svm_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /*
+ * For guests that don't set guest_state_protected, the cr3 update is
+ * handled via kvm_mmu_load() while entering the guest. For guests
+ * that do (SEV-ES/SEV-SNP), the cr3 update needs to be written to
+ * VMCB save area now, since the save area will become the initial
+ * contents of the VMSA, and future VMCB save area updates won't be
+ * seen.
+ */
+ if (sev_es_guest(vcpu->kvm)) {
+ svm->vmcb->save.cr3 = cr3;
+ vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+ }
+}
+
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2517,7 +2307,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
bool ret = false;
if (!is_guest_mode(vcpu) ||
- (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
+ (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
return false;
cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -3800,6 +3590,11 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(vcpu);
}
+static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
@@ -3931,6 +3726,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
}
+ vcpu->arch.regs_dirty = 0;
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
@@ -3965,8 +3761,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.apf.host_apf_flags =
kvm_read_and_reset_apf_flags();
- if (npt_enabled)
- kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
+ vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
/*
* We need to handle MC intercepts here before the vcpu has a chance to
@@ -3996,9 +3791,6 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
hv_track_root_tdp(vcpu, root_hpa);
- /* Loading L2's CR3 is handled by enter_svm_guest_mode. */
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- return;
cr3 = vcpu->arch.cr3;
} else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
@@ -4217,7 +4009,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
info->intercept == x86_intercept_clts)
break;
- if (!(vmcb_is_intercept(&svm->nested.ctl,
+ if (!(vmcb12_is_intercept(&svm->nested.ctl,
INTERCEPT_SELECTIVE_CR0)))
break;
@@ -4436,7 +4228,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
*/
vmcb12 = map.hva;
- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+ nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
unmap_save:
@@ -4598,8 +4391,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
- .vcpu_blocking = svm_vcpu_blocking,
- .vcpu_unblocking = svm_vcpu_unblocking,
+ .vcpu_blocking = avic_vcpu_blocking,
+ .vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
@@ -4611,6 +4404,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.set_cr0 = svm_set_cr0,
+ .post_set_cr3 = svm_post_set_cr3,
.is_valid_cr4 = svm_is_valid_cr4,
.set_cr4 = svm_set_cr4,
.set_efer = svm_set_efer,
@@ -4630,6 +4424,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.tlb_flush_gva = svm_flush_tlb_gva,
.tlb_flush_guest = svm_flush_tlb,
+ .vcpu_pre_run = svm_vcpu_pre_run,
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
@@ -4710,6 +4505,243 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};
+/*
+ * The default MMIO mask is a single bit (excluding the present bit),
+ * which could conflict with the memory encryption bit. Check for
+ * memory encryption support and override the default MMIO mask if
+ * memory encryption is enabled.
+ */
+static __init void svm_adjust_mmio_mask(void)
+{
+ unsigned int enc_bit, mask_bit;
+ u64 msr, mask;
+
+ /* If there is no memory encryption support, use existing mask */
+ if (cpuid_eax(0x80000000) < 0x8000001f)
+ return;
+
+ /* If memory encryption is not enabled, use existing mask */
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+ mask_bit = boot_cpu_data.x86_phys_bits;
+
+ /* Increment the mask bit if it is the same as the encryption bit */
+ if (enc_bit == mask_bit)
+ mask_bit++;
+
+ /*
+ * If the mask bit location is below 52, then some bits above the
+ * physical addressing limit will always be reserved, so use the
+ * rsvd_bits() function to generate the mask. This mask, along with
+ * the present bit, will be used to generate a page fault with
+ * PFER.RSV = 1.
+ *
+ * If the mask bit location is 52 (or above), then clear the mask.
+ */
+ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+}
+
+static __init void svm_set_cpu_caps(void)
+{
+ kvm_set_cpu_caps();
+
+ supported_xss = 0;
+
+ /* CPUID 0x80000001 and 0x8000000A (SVM features) */
+ if (nested) {
+ kvm_cpu_cap_set(X86_FEATURE_SVM);
+
+ if (nrips)
+ kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+
+ if (npt_enabled)
+ kvm_cpu_cap_set(X86_FEATURE_NPT);
+
+ if (tsc_scaling)
+ kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+
+ /* Nested VM can receive #VMEXIT instead of triggering #GP */
+ kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
+ }
+
+ /* CPUID 0x80000008 */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /* AMD PMU PERFCTR_CORE CPUID */
+ if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
+
+ /* CPUID 0x8000001F (SME/SEV features) */
+ sev_set_cpu_caps();
+}
+
+static __init int svm_hardware_setup(void)
+{
+ int cpu;
+ struct page *iopm_pages;
+ void *iopm_va;
+ int r;
+ unsigned int order = get_order(IOPM_SIZE);
+
+ /*
+ * NX is required for shadow paging and for NPT if the NX huge pages
+ * mitigation is enabled.
+ */
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+ kvm_enable_efer_bits(EFER_NX);
+
+ iopm_pages = alloc_pages(GFP_KERNEL, order);
+
+ if (!iopm_pages)
+ return -ENOMEM;
+
+ iopm_va = page_address(iopm_pages);
+ memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
+ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+
+ init_msrpm_offsets();
+
+ supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (tsc_scaling) {
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ tsc_scaling = false;
+ } else {
+ pr_info("TSC scaling supported\n");
+ kvm_has_tsc_control = true;
+ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 32;
+ }
+ }
+
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
+
+ /* Check for pause filtering support */
+ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+ pause_filter_count = 0;
+ pause_filter_thresh = 0;
+ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+ pause_filter_thresh = 0;
+ }
+
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
+ }
+
+ /*
+ * KVM's MMU doesn't support using 2-level paging for itself, and thus
+ * NPT isn't supported if the host is using 2-level paging since host
+ * CR4 is unchanged on VMRUN.
+ */
+ if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
+ npt_enabled = false;
+
+ if (!boot_cpu_has(X86_FEATURE_NPT))
+ npt_enabled = false;
+
+ /* Force VM NPT level equal to the host's paging level */
+ kvm_configure_mmu(npt_enabled, get_npt_level(),
+ get_npt_level(), PG_LEVEL_1G);
+ pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+
+ /* Note, SEV setup consumes npt_enabled. */
+ sev_hardware_setup();
+
+ svm_hv_hardware_setup();
+
+ svm_adjust_mmio_mask();
+
+ for_each_possible_cpu(cpu) {
+ r = svm_cpu_init(cpu);
+ if (r)
+ goto err;
+ }
+
+ if (nrips) {
+ if (!boot_cpu_has(X86_FEATURE_NRIPS))
+ nrips = false;
+ }
+
+ enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
+
+ if (enable_apicv) {
+ pr_info("AVIC enabled\n");
+
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ } else {
+ svm_x86_ops.vcpu_blocking = NULL;
+ svm_x86_ops.vcpu_unblocking = NULL;
+ }
+
+ if (vls) {
+ if (!npt_enabled ||
+ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
+ !IS_ENABLED(CONFIG_X86_64)) {
+ vls = false;
+ } else {
+ pr_info("Virtual VMLOAD VMSAVE supported\n");
+ }
+ }
+
+ if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
+ svm_gp_erratum_intercept = false;
+
+ if (vgif) {
+ if (!boot_cpu_has(X86_FEATURE_VGIF))
+ vgif = false;
+ else
+ pr_info("Virtual GIF supported\n");
+ }
+
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
+
+ if (!enable_pmu)
+ pr_info("PMU virtualization is disabled\n");
+
+ svm_set_cpu_caps();
+
+ /*
+ * It seems that on AMD processors PTE's accessed bit is
+ * being set by the CPU hardware before the NPF vmexit.
+ * This is not expected behaviour and our tests fail because
+ * of it.
+ * A workaround here is to disable support for
+ * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
+ * In this case userspace can know if there is support using
+ * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
+ * it
+ * If future AMD CPU models change the behaviour described above,
+ * this variable can be changed accordingly
+ */
+ allow_smaller_maxphyaddr = !npt_enabled;
+
+ return 0;
+
+err:
+ svm_hardware_teardown();
+ return r;
+}
+
+
static struct kvm_x86_init_ops svm_init_ops __initdata = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1c7306c370fa..47ef8f4a9358 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -105,6 +105,40 @@ struct kvm_vmcb_info {
uint64_t asid_generation;
};
+struct vmcb_save_area_cached {
+ u64 efer;
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+};
+
+struct vmcb_ctrl_area_cached {
+ u32 intercepts[MAX_INTERCEPT];
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+ u64 iopm_base_pa;
+ u64 msrpm_base_pa;
+ u64 tsc_offset;
+ u32 asid;
+ u8 tlb_ctl;
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+ u32 exit_code;
+ u32 exit_code_hi;
+ u64 exit_info_1;
+ u64 exit_info_2;
+ u32 exit_int_info;
+ u32 exit_int_info_err;
+ u64 nested_ctl;
+ u32 event_inj;
+ u32 event_inj_err;
+ u64 nested_cr3;
+ u64 virt_ext;
+};
+
struct svm_nested_state {
struct kvm_vmcb_info vmcb02;
u64 hsave_msr;
@@ -120,7 +154,13 @@ struct svm_nested_state {
bool nested_run_pending;
/* cache for control fields of the guest */
- struct vmcb_control_area ctl;
+ struct vmcb_ctrl_area_cached ctl;
+
+ /*
+ * Note: this struct is not kept up-to-date while L2 runs; it is only
+ * valid within nested_svm_vmrun.
+ */
+ struct vmcb_save_area_cached save;
bool initialized;
};
@@ -185,7 +225,6 @@ struct vcpu_svm {
u32 dfr_reg;
struct page *avic_backing_page;
u64 *avic_physical_id_cache;
- bool avic_is_running;
/*
* Per-vcpu list of struct amd_svm_iommu_ir:
@@ -285,6 +324,16 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+/*
+ * Only the PDPTRs are loaded on demand into the shadow MMU. All other
+ * fields are synchronized in handle_exit, because accessing the VMCB is cheap.
+ *
+ * CR3 might be out of date in the VMCB but it is not marked dirty; instead,
+ * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
+ * is changed. svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
+ */
+#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
+
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
@@ -303,6 +352,12 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
return test_bit(bit, (unsigned long *)&control->intercepts);
}
+static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
+{
+ WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
+ return test_bit(bit, (unsigned long *)&control->intercepts);
+}
+
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -455,17 +510,17 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
}
static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
}
static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
@@ -494,8 +549,10 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control);
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
+ struct vmcb_control_area *control);
+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+ struct vmcb_save_area *save);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
@@ -515,17 +572,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 *entry = svm->avic_physical_id_cache;
-
- if (!entry)
- return false;
-
- return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
-}
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -546,8 +592,8 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu);
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 953b0fcb21ee..92e6f6702f00 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1356,6 +1356,30 @@ TRACE_EVENT(kvm_apicv_update_request,
__entry->bit)
);
+TRACE_EVENT(kvm_apicv_accept_irq,
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
+ TP_ARGS(apicid, dm, tm, vec),
+
+ TP_STRUCT__entry(
+ __field( __u32, apicid )
+ __field( __u16, dm )
+ __field( __u16, tm )
+ __field( __u8, vec )
+ ),
+
+ TP_fast_assign(
+ __entry->apicid = apicid;
+ __entry->dm = dm;
+ __entry->tm = tm;
+ __entry->vec = vec;
+ ),
+
+ TP_printk("apicid %x vec %u (%s|%s)",
+ __entry->apicid, __entry->vec,
+ __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
+ __entry->tm ? "level" : "edge")
+);
+
/*
* Tracepoint for AMD AVIC
*/
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4705ad55abb5..959b59d13b5a 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -5,6 +5,7 @@
#include <asm/vmx.h>
#include "lapic.h"
+#include "x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
@@ -312,6 +313,15 @@ static inline bool cpu_has_vmx_ept_1g_page(void)
return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
}
+static inline int ept_caps_to_lpage_level(u32 ept_caps)
+{
+ if (ept_caps & VMX_EPT_1GB_PAGE_BIT)
+ return PG_LEVEL_1G;
+ if (ept_caps & VMX_EPT_2MB_PAGE_BIT)
+ return PG_LEVEL_2M;
+ return PG_LEVEL_4K;
+}
+
static inline bool cpu_has_vmx_ept_ad_bits(void)
{
return vmx_capability.ept & VMX_EPT_AD_BIT;
@@ -380,6 +390,9 @@ static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = 0;
+ if (!enable_pmu)
+ return perf_cap;
+
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 9c941535f78c..f235f77cbc03 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -245,7 +245,8 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;
- vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
+ vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
+ src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
@@ -269,7 +270,13 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
vmx_sync_vmcs_host_state(vmx, prev);
put_cpu();
- vmx_register_cache_reset(vcpu);
+ vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;
+
+ /*
+ * All lazily updated registers will be reloaded from VMCS12 on both
+ * vmentry and vmexit.
+ */
+ vcpu->arch.regs_dirty = 0;
}
/*
@@ -391,9 +398,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
{
- kvm_init_shadow_ept_mmu(vcpu,
- to_vmx(vcpu)->nested.msrs.ept_caps &
- VMX_EPT_EXECUTE_ONLY_BIT,
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
+ int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);
+
+ kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
nested_ept_ad_enabled(vcpu),
nested_ept_get_eptp(vcpu));
}
@@ -591,6 +600,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
int msr;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+ struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
@@ -598,6 +608,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return false;
+ /*
+ * MSR bitmap update can be skipped when:
+ * - MSR bitmap for L1 hasn't changed.
+ * - Nested hypervisor (L1) is attempting to launch the same L2 as
+ * before.
+ * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
+ * and tells KVM (L0) there were no changes in MSR bitmap for L2.
+ */
+ if (!vmx->nested.force_msr_bitmap_recalc && evmcs &&
+ evmcs->hv_enlightenments_control.msr_bitmap &&
+ evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
+ return true;
+
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
return false;
@@ -664,6 +687,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
+ vmx->nested.force_msr_bitmap_recalc = false;
+
return true;
}
@@ -1095,7 +1120,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
* must not be dereferenced.
*/
if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
- CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
+ CC(!load_pdptrs(vcpu, cr3))) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
return -EINVAL;
}
@@ -1104,7 +1129,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
kvm_init_mmu(vcpu);
@@ -2021,10 +2046,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
* Clean fields data can't be used on VMLAUNCH and when we switch
* between different L2 guests as KVM keeps a single VMCS12 per L1.
*/
- if (from_launch || evmcs_gpa_changed)
+ if (from_launch || evmcs_gpa_changed) {
vmx->nested.hv_evmcs->hv_clean_fields &=
~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ vmx->nested.force_msr_bitmap_recalc = true;
+ }
+
return EVMPTRLD_SUCCEEDED;
}
@@ -3027,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr4;
bool vm_fail;
if (!nested_early_check)
@@ -3050,12 +3078,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/
vmcs_writel(GUEST_RFLAGS, 0);
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
-
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -3145,7 +3167,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
* the guest CR3 might be restored prior to setting the nested
* state which can lead to a load of wrong PDPTRs.
*/
- if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
return false;
}
@@ -3504,10 +3526,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (evmptrld_status == EVMPTRLD_ERROR) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
- } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
- return nested_vmx_failInvalid(vcpu);
}
+ kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+ if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
+ return nested_vmx_failInvalid(vcpu);
+
if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
vmx->nested.current_vmptr == INVALID_GPA))
return nested_vmx_failInvalid(vcpu);
@@ -3603,7 +3628,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
!(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
(vmcs12->guest_rflags & X86_EFLAGS_IF))) {
vmx->nested.nested_run_pending = 0;
- return kvm_vcpu_halt(vcpu);
+ return kvm_emulate_halt_noskip(vcpu);
}
break;
case GUEST_ACTIVITY_WAIT_SIPI:
@@ -5258,6 +5283,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
vmx->nested.need_vmcs12_to_shadow_sync = true;
}
vmx->nested.dirty_vmcs12 = true;
+ vmx->nested.force_msr_bitmap_recalc = true;
}
/* Emulate the VMPTRLD instruction */
@@ -6393,6 +6419,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
goto error_guest_mode;
vmx->nested.dirty_vmcs12 = true;
+ vmx->nested.force_msr_bitmap_recalc = true;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
if (ret)
goto error_guest_mode;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 1b7456b2177b..466d18fc0c5d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -21,7 +21,6 @@
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
- /* Index must match CPUID 0x0A.EBX bit vector */
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
@@ -29,6 +28,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ /* The above index must match CPUID 0x0A.EBX bit vector */
[7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
@@ -68,34 +68,29 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
reprogram_counter(pmu, bit);
}
-static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
- u8 event_select,
- u8 unit_mask)
+static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+ u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
- for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
- if (intel_arch_events[i].eventsel == event_select
- && intel_arch_events[i].unit_mask == unit_mask
- && (pmu->available_event_types & (1 << i)))
- break;
-
- if (i == ARRAY_SIZE(intel_arch_events))
- return PERF_COUNT_HW_MAX;
+ for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+ if (intel_arch_events[i].eventsel != event_select ||
+ intel_arch_events[i].unit_mask != unit_mask)
+ continue;
- return intel_arch_events[i].event_type;
-}
+ /* disable event that reported as not present by cpuid */
+ if ((i < 7) && !(pmu->available_event_types & (1 << i)))
+ return PERF_COUNT_HW_MAX + 1;
-static unsigned intel_find_fixed_event(int idx)
-{
- u32 event;
- size_t size = ARRAY_SIZE(fixed_pmc_events);
+ break;
+ }
- if (idx >= size)
+ if (i == ARRAY_SIZE(intel_arch_events))
return PERF_COUNT_HW_MAX;
- event = fixed_pmc_events[array_index_nospec(idx, size)];
- return intel_arch_events[event].event_type;
+ return intel_arch_events[i].event_type;
}
/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
@@ -459,6 +454,21 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
+static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
+{
+ size_t size = ARRAY_SIZE(fixed_pmc_events);
+ struct kvm_pmc *pmc;
+ u32 event;
+ int i;
+
+ for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+ pmc = &pmu->fixed_counters[i];
+ event = fixed_pmc_events[array_index_nospec(i, size)];
+ pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
+ intel_arch_events[event].eventsel;
+ }
+}
+
static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -477,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry)
+ if (!entry || !enable_pmu)
return;
eax.full = entry->eax;
edx.full = entry->edx;
@@ -500,12 +510,14 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_fixed_counters = 0;
} else {
pmu->nr_arch_fixed_counters =
- min_t(int, edx.split.num_counters_fixed,
- x86_pmu.num_counters_fixed);
+ min3(ARRAY_SIZE(fixed_pmc_events),
+ (size_t) edx.split.num_counters_fixed,
+ (size_t) x86_pmu.num_counters_fixed);
edx.split.bit_width_fixed = min_t(int,
edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
((u64)1 << edx.split.bit_width_fixed) - 1;
+ setup_fixed_pmc_eventsel(pmu);
}
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
@@ -703,8 +715,7 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
}
struct kvm_pmu_ops intel_pmu_ops = {
- .find_arch_event = intel_find_arch_event,
- .find_fixed_event = intel_find_fixed_event,
+ .pmc_perf_hw_id = intel_pmc_perf_hw_id,
.pmc_is_enabled = intel_pmc_is_enabled,
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 1c94783b5a54..aa1fe9085d77 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -11,58 +11,109 @@
#include "vmx.h"
/*
- * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we
- * can find which vCPU should be waken up.
+ * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
+ * when a WAKEUP_VECTOR interrupted is posted. vCPUs are added to the list when
+ * the vCPU is scheduled out and is blocking (e.g. in HLT) with IRQs enabled.
+ * The vCPUs posted interrupt descriptor is updated at the same time to set its
+ * notification vector to WAKEUP_VECTOR, so that posted interrupt from devices
+ * wake the target vCPUs. vCPUs are removed from the list and the notification
+ * vector is reset when the vCPU is scheduled in.
*/
-static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
+/*
+ * Protect the per-CPU list with a per-CPU spinlock to handle task migration.
+ * When a blocking vCPU is awakened _and_ migrated to a different pCPU, the
+ * ->sched_in() path will need to take the vCPU off the list of the _previous_
+ * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will
+ * occur if a wakeup IRQ arrives and attempts to acquire the lock.
+ */
+static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vmx(vcpu)->pi_desc);
}
+static int pi_try_set_control(struct pi_desc *pi_desc, u64 old, u64 new)
+{
+ /*
+ * PID.ON can be set at any time by a different vCPU or by hardware,
+ * e.g. a device. PID.control must be written atomically, and the
+ * update must be retried with a fresh snapshot an ON change causes
+ * the cmpxchg to fail.
+ */
+ if (cmpxchg64(&pi_desc->control, old, new) != old)
+ return -EBUSY;
+
+ return 0;
+}
+
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct pi_desc old, new;
+ unsigned long flags;
unsigned int dest;
/*
- * In case of hot-plug or hot-unplug, we may have to undo
- * vmx_vcpu_pi_put even if there is no assigned device. And we
- * always keep PI.NDST up to date for simplicity: it makes the
- * code easier, and CPU migration is not a fast path.
+ * To simplify hot-plug and dynamic toggling of APICv, keep PI.NDST and
+ * PI.SN up-to-date even if there is no assigned device or if APICv is
+ * deactivated due to a dynamic inhibit bit, e.g. for Hyper-V's SyncIC.
*/
- if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ if (!enable_apicv || !lapic_in_kernel(vcpu))
return;
/*
- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
- * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
- * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
- * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
- * correctly.
+ * If the vCPU wasn't on the wakeup list and wasn't migrated, then the
+ * full update can be skipped as neither the vector nor the destination
+ * needs to be changed.
*/
- if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
- pi_clear_sn(pi_desc);
- goto after_clear_sn;
+ if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR && vcpu->cpu == cpu) {
+ /*
+ * Clear SN if it was set due to being preempted. Again, do
+ * this even if there is no assigned device for simplicity.
+ */
+ if (pi_test_and_clear_sn(pi_desc))
+ goto after_clear_sn;
+ return;
}
- /* The full case. */
- do {
- old.control = new.control = pi_desc->control;
+ local_irq_save(flags);
- dest = cpu_physical_id(cpu);
+ /*
+ * If the vCPU was waiting for wakeup, remove the vCPU from the wakeup
+ * list of the _previous_ pCPU, which will not be the same as the
+ * current pCPU if the task was migrated.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ list_del(&vmx->pi_wakeup_list);
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ }
- if (x2apic_mode)
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
+ dest = cpu_physical_id(cpu);
+ if (!x2apic_mode)
+ dest = (dest << 8) & 0xFF00;
+
+ do {
+ old.control = new.control = READ_ONCE(pi_desc->control);
+ /*
+ * Clear SN (as above) and refresh the destination APIC ID to
+ * handle task migration (@cpu != vcpu->cpu).
+ */
+ new.ndst = dest;
new.sn = 0;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
+
+ /*
+ * Restore the notification vector; in the blocking case, the
+ * descriptor was modified on "put" to use the wakeup vector.
+ */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (pi_try_set_control(pi_desc, old.control, new.control));
+
+ local_irq_restore(flags);
after_clear_sn:
@@ -85,126 +136,64 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
irq_remapping_cap(IRQ_POSTING_CAP);
}
-void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!vmx_can_use_vtd_pi(vcpu->kvm))
- return;
-
- /* Set SN when the vCPU is preempted */
- if (vcpu->preempted)
- pi_set_sn(pi_desc);
-}
-
-static void __pi_post_block(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
-
- do {
- old.control = new.control = pi_desc->control;
- WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
- "Wakeup handler not enabled while the VCPU is blocked\n");
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_mode)
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- vcpu->pre_pcpu = -1;
- }
-}
-
/*
- * This routine does the following things for vCPU which is going
- * to be blocked if VT-d PI is enabled.
- * - Store the vCPU to the wakeup list, so when interrupts happen
- * we can find the right vCPU to wake up.
- * - Change the Posted-interrupt descriptor as below:
- * 'NDST' <-- vcpu->pre_pcpu
- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
- * - If 'ON' is set during this process, which means at least one
- * interrupt is posted for this vCPU, we cannot block it, in
- * this case, return 1, otherwise, return 0.
- *
+ * Put the vCPU on this pCPU's list of vCPUs that needs to be awakened and set
+ * WAKEUP as the notification vector in the PI descriptor.
*/
-int pi_pre_block(struct kvm_vcpu *vcpu)
+static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
{
- unsigned int dest;
- struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct pi_desc old, new;
+ unsigned long flags;
- if (!vmx_can_use_vtd_pi(vcpu->kvm))
- return 0;
-
- WARN_ON(irqs_disabled());
- local_irq_disable();
- if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- }
-
- do {
- old.control = new.control = pi_desc->control;
+ local_irq_save(flags);
- WARN((pi_desc->sn == 1),
- "Warning: SN field of posted-interrupts "
- "is set before blocking\n");
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ list_add_tail(&vmx->pi_wakeup_list,
+ &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
- /*
- * Since vCPU can be preempted during this process,
- * vcpu->cpu could be different with pre_pcpu, we
- * need to set pre_pcpu as the destination of wakeup
- * notification event, then we can find the right vCPU
- * to wakeup in wakeup handler if interrupts happen
- * when the vCPU is in blocked state.
- */
- dest = cpu_physical_id(vcpu->pre_pcpu);
+ WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
- if (x2apic_mode)
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
+ do {
+ old.control = new.control = READ_ONCE(pi_desc->control);
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (pi_try_set_control(pi_desc, old.control, new.control));
- /* We should not block the vCPU if an interrupt is posted for it. */
- if (pi_test_on(pi_desc) == 1)
- __pi_post_block(vcpu);
+ /*
+ * Send a wakeup IPI to this CPU if an interrupt may have been posted
+ * before the notification vector was updated, in which case the IRQ
+ * will arrive on the non-wakeup vector. An IPI is needed as calling
+ * try_to_wake_up() from ->sched_out() isn't allowed (IRQs are not
+ * enabled until it is safe to call try_to_wake_up() on the task being
+ * scheduled out).
+ */
+ if (pi_test_on(&new))
+ apic->send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);
- local_irq_enable();
- return (vcpu->pre_pcpu == -1);
+ local_irq_restore(flags);
}
-void pi_post_block(struct kvm_vcpu *vcpu)
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{
- if (vcpu->pre_pcpu == -1)
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (!vmx_can_use_vtd_pi(vcpu->kvm))
return;
- WARN_ON(irqs_disabled());
- local_irq_disable();
- __pi_post_block(vcpu);
- local_irq_enable();
+ if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
+ pi_enable_wakeup_handler(vcpu);
+
+ /*
+ * Set SN when the vCPU is preempted. Note, the vCPU can both be seen
+ * as blocking and preempted, e.g. if it's preempted between setting
+ * its wait state and manually scheduling out.
+ */
+ if (vcpu->preempted)
+ pi_set_sn(pi_desc);
}
/*
@@ -212,24 +201,23 @@ void pi_post_block(struct kvm_vcpu *vcpu)
*/
void pi_wakeup_handler(void)
{
- struct kvm_vcpu *vcpu;
int cpu = smp_processor_id();
+ struct vcpu_vmx *vmx;
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
- blocked_vcpu_list) {
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
+ list_for_each_entry(vmx, &per_cpu(wakeup_vcpus_on_cpu, cpu),
+ pi_wakeup_list) {
- if (pi_test_on(pi_desc) == 1)
- kvm_vcpu_kick(vcpu);
+ if (pi_test_on(&vmx->pi_desc))
+ kvm_vcpu_wake_up(&vmx->vcpu);
}
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
}
void __init pi_init_cpu(int cpu)
{
- INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ INIT_LIST_HEAD(&per_cpu(wakeup_vcpus_on_cpu, cpu));
+ raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
}
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -245,7 +233,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
* Bail out of the block loop if the VM has an assigned
* device, but the blocking vCPU didn't reconfigure the
* PI.NV to the wakeup vector, i.e. the assigned device
- * came along after the initial check in pi_pre_block().
+ * came along after the initial check in vmx_vcpu_pi_put().
*/
void vmx_pi_start_assignment(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 7f7b2326caf5..eb14e76b84ef 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -40,7 +40,13 @@ static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
-static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
{
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}
@@ -74,13 +80,13 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
-static inline int pi_test_on(struct pi_desc *pi_desc)
+static inline bool pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
(unsigned long *)&pi_desc->control);
}
-static inline int pi_test_sn(struct pi_desc *pi_desc)
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_SN,
(unsigned long *)&pi_desc->control);
@@ -88,8 +94,6 @@ static inline int pi_test_sn(struct pi_desc *pi_desc)
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
-int pi_pre_block(struct kvm_vcpu *vcpu);
-void pi_post_block(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 6e5de2e2b0da..e325c290a816 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -129,6 +129,11 @@ static inline bool is_machine_check(u32 intr_info)
return is_exception_n(intr_info, MC_VECTOR);
}
+static inline bool is_nm_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, NM_VECTOR);
+}
+
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1187cd1e38aa..4ac676066d60 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -36,6 +36,7 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/fpu/xstate.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
@@ -161,6 +162,8 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_FS_BASE,
MSR_GS_BASE,
MSR_KERNEL_GS_BASE,
+ MSR_IA32_XFD,
+ MSR_IA32_XFD_ERR,
#endif
MSR_IA32_SYSENTER_CS,
MSR_IA32_SYSENTER_ESP,
@@ -602,15 +605,13 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
unsigned int slot = msr - vmx->guest_uret_msrs;
int ret = 0;
- u64 old_msr_data = msr->data;
- msr->data = data;
if (msr->load_into_hardware) {
preempt_disable();
- ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
+ ret = kvm_set_user_return_msr(slot, data, msr->mask);
preempt_enable();
- if (ret)
- msr->data = old_msr_data;
}
+ if (!ret)
+ msr->data = data;
return ret;
}
@@ -763,6 +764,14 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
}
+ /*
+ * Disabling xfd interception indicates that dynamic xfeatures
+ * might be used in the guest. Always trap #NM in this case
+ * to save guest xfd_err timely.
+ */
+ if (vcpu->arch.xfd_no_write_intercept)
+ eb |= (1u << NM_VECTOR);
+
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -1071,9 +1080,14 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
-void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base)
+void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
+ u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
{
+ if (unlikely(cr3 != host->cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ host->cr3 = cr3;
+ }
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
@@ -1168,7 +1182,9 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
+ vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(),
+ fs_sel, gs_sel, fs_base, gs_base);
+
vmx->guest_state_loaded = true;
}
@@ -1271,7 +1287,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
if (!already_loaded) {
void *gdt = get_current_gdt_ro();
- unsigned long sysenter_esp;
/*
* Flush all EPTP/VPID contexts, the new pCPU may have stale
@@ -1287,8 +1302,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
(unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
- rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
- vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+ if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
+ /* 22.2.3 */
+ vmcs_writel(HOST_IA32_SYSENTER_ESP,
+ (unsigned long)(cpu_entry_stack(cpu) + 1));
+ }
vmx->loaded_vmcs->cpu = cpu;
}
@@ -1753,7 +1771,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
}
/*
- * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Reads an msr value (of 'msr_info->index') into 'msr_info->data'.
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
@@ -1960,6 +1978,24 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KERNEL_GS_BASE:
vmx_write_guest_kernel_gs_base(vmx, data);
break;
+ case MSR_IA32_XFD:
+ ret = kvm_set_msr_common(vcpu, msr_info);
+ /*
+ * Always intercepting WRMSR could incur non-negligible
+ * overhead given xfd might be changed frequently in
+ * guest context switch. Disable write interception
+ * upon the first write with a non-zero value (indicating
+ * potential usage on dynamic xfeatures). Also update
+ * exception bitmap to trap #NM for proper virtualization
+ * of guest xfd_err.
+ */
+ if (!ret && data) {
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD,
+ MSR_TYPE_RW);
+ vcpu->arch.xfd_no_write_intercept = true;
+ vmx_update_exception_bitmap(vcpu);
+ }
+ break;
#endif
case MSR_IA32_SYSENTER_CS:
if (is_guest_mode(vcpu))
@@ -2100,9 +2136,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
ret = kvm_set_msr_common(vcpu, msr_info);
break;
- case MSR_IA32_TSC_ADJUST:
- ret = kvm_set_msr_common(vcpu, msr_info);
- break;
case MSR_IA32_MCG_EXT_CTL:
if ((!msr_info->host_initiated &&
!(to_vmx(vcpu)->msr_ia32_feature_control &
@@ -2985,7 +3018,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
}
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
@@ -3067,6 +3100,13 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
/* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */
if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
+
+ /*
+ * When !CR0_PG -> CR0_PG, vcpu->arch.cr3 becomes active, but
+ * GUEST_CR3 is still vmx->ept_identity_map_addr if EPT + !URG.
+ */
+ if (!(old_cr0_pg & X86_CR0_PG) && (cr0 & X86_CR0_PG))
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
}
/* depends on vcpu->arch.cr0 to be set to a new value */
@@ -3110,9 +3150,9 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
- else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3))
guest_cr3 = vcpu->arch.cr3;
- else /* vmcs01.GUEST_CR3 is already up-to-date. */
+ else /* vmcs.GUEST_CR3 is already up-to-date. */
update_guest_cr3 = false;
vmx_ept_load_pdptrs(vcpu);
} else {
@@ -3123,6 +3163,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
vmcs_writel(GUEST_CR3, guest_cr3);
}
+
static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
/*
@@ -3687,6 +3728,19 @@ void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
+static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
+{
+ /*
+ * When KVM is a nested hypervisor on top of Hyper-V and uses
+ * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
+ * bitmap has changed.
+ */
+ if (static_branch_unlikely(&enable_evmcs))
+ evmcs_touch_msr_bitmap();
+
+ vmx->nested.force_msr_bitmap_recalc = true;
+}
+
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3695,8 +3749,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
if (!cpu_has_vmx_msr_bitmap())
return;
- if (static_branch_unlikely(&enable_evmcs))
- evmcs_touch_msr_bitmap();
+ vmx_msr_bitmap_l01_changed(vmx);
/*
* Mark the desired intercept state in shadow bitmap, this is needed
@@ -3740,8 +3793,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
if (!cpu_has_vmx_msr_bitmap())
return;
- if (static_branch_unlikely(&enable_evmcs))
- evmcs_touch_msr_bitmap();
+ vmx_msr_bitmap_l01_changed(vmx);
/*
* Mark the desired intercept state in shadow bitmap, this is needed
@@ -3879,12 +3931,10 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
pt_update_intercept_for_msr(vcpu);
}
-static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
- bool nested)
+static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
+ int pi_vec)
{
#ifdef CONFIG_SMP
- int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
-
if (vcpu->mode == IN_GUEST_MODE) {
/*
* The vector of interrupt to be delivered to vcpu had
@@ -3912,10 +3962,15 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
*/
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
- return true;
+ return;
}
#endif
- return false;
+ /*
+ * The vCPU isn't in the guest; wake the vCPU in case it is blocking,
+ * otherwise do nothing as KVM will grab the highest priority pending
+ * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest().
+ */
+ kvm_vcpu_wake_up(vcpu);
}
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -3931,9 +3986,21 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ /*
+ * This pairs with the smp_mb_*() after setting vcpu->mode in
+ * vcpu_enter_guest() to guarantee the vCPU sees the event
+ * request if triggering a posted interrupt "fails" because
+ * vcpu->mode != IN_GUEST_MODE. The extra barrier is needed as
+ * the smb_wmb() in kvm_make_request() only ensures everything
+ * done before making the request is visible when the request
+ * is visible, it doesn't ensure ordering between the store to
+ * vcpu->requests and the load from vcpu->mode.
+ */
+ smp_mb__after_atomic();
+
/* the PIR and ON have been set by L1. */
- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
- kvm_vcpu_kick(vcpu);
+ kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR);
return 0;
}
return -1;
@@ -3964,9 +4031,13 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
if (pi_test_and_set_on(&vmx->pi_desc))
return 0;
- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
- kvm_vcpu_kick(vcpu);
-
+ /*
+ * The implied barrier in pi_test_and_set_on() pairs with the smp_mb_*()
+ * after setting vcpu->mode in vcpu_enter_guest(), thus the vCPU is
+ * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
+ * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
+ */
+ kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR);
return 0;
}
@@ -4021,6 +4092,12 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
+
+ /*
+ * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
+ * HOST_IA32_SYSENTER_ESP.
+ */
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
@@ -4039,8 +4116,10 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS &
~vcpu->arch.cr4_guest_rsvd_bits;
- if (!enable_ept)
- vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PGE;
+ if (!enable_ept) {
+ vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_TLBFLUSH_BITS;
+ vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PDPTR_BITS;
+ }
if (is_guest_mode(&vmx->vcpu))
vcpu->arch.cr4_guest_owned_bits &=
~get_vmcs12(vcpu)->cr4_guest_host_mask;
@@ -4692,7 +4771,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
if (kvm_emulate_instruction(vcpu, 0)) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
- return kvm_vcpu_halt(vcpu);
+ return kvm_emulate_halt_noskip(vcpu);
}
return 1;
}
@@ -4748,6 +4827,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info) || is_nmi(intr_info))
return 1; /* handled by handle_exception_nmi_irqoff() */
+ /*
+ * Queue the exception here instead of in handle_nm_fault_irqoff().
+ * This ensures the nested_vmx check is not skipped so vmexit can
+ * be reflected to L1 (when it intercepts #NM) before reaching this
+ * point.
+ */
+ if (is_nm_fault(intr_info)) {
+ kvm_queue_exception(vcpu, NM_VECTOR);
+ return 1;
+ }
+
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
@@ -5336,6 +5426,14 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
return 1;
}
+static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ return vmx->emulation_required && !vmx->rmode.vm86_active &&
+ vcpu->arch.exception.pending;
+}
+
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5355,15 +5453,14 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (!kvm_emulate_instruction(vcpu, 0))
return 0;
- if (vmx->emulation_required && !vmx->rmode.vm86_active &&
- vcpu->arch.exception.pending) {
+ if (vmx_emulation_required_with_pending_exception(vcpu)) {
kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
- return kvm_vcpu_halt(vcpu);
+ return kvm_emulate_halt_noskip(vcpu);
}
/*
@@ -5378,6 +5475,16 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
return 1;
}
+static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ if (vmx_emulation_required_with_pending_exception(vcpu)) {
+ kvm_prepare_emulation_failure_exit(vcpu);
+ return 0;
+ }
+
+ return 1;
+}
+
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6351,6 +6458,26 @@ static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
kvm_after_interrupt(vcpu);
}
+static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Save xfd_err to guest_fpu before interrupt is enabled, so the
+ * MSR value is not clobbered by the host activity before the guest
+ * has chance to consume it.
+ *
+ * Do not blindly read xfd_err here, since this exception might
+ * be caused by L1 interception on a platform which doesn't
+ * support xfd at all.
+ *
+ * Do it conditionally upon guest_fpu::xfd. xfd_err matters
+ * only when xfd contains a non-zero value.
+ *
+ * Queuing exception is done in vmx_handle_exit. See comment there.
+ */
+ if (vcpu->arch.guest_fpu.fpstate->xfd)
+ rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+}
+
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
@@ -6359,6 +6486,9 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
/* if exit due to PF check for async PF */
if (is_page_fault(intr_info))
vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
+ /* if exit due to NM, handle before interrupts are enabled */
+ else if (is_nm_fault(intr_info))
+ handle_nm_fault_irqoff(&vmx->vcpu);
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
@@ -6617,7 +6747,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -6658,12 +6788,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
+ vcpu->arch.regs_dirty = 0;
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
@@ -6752,7 +6877,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
loadsegment(es, __USER_DS);
#endif
- vmx_register_cache_reset(vcpu);
+ vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
pt_guest_exit(vmx);
@@ -6820,6 +6945,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
vmx = to_vmx(vcpu);
+ INIT_LIST_HEAD(&vmx->pi_wakeup_list);
+
err = -ENOMEM;
vmx->vpid = allocate_vpid();
@@ -6971,7 +7098,6 @@ static int __init vmx_check_processor_compat(void)
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
u8 cache;
- u64 ipat = 0;
/* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
* memory aliases with conflicting memory types and sometimes MCEs.
@@ -6991,30 +7117,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
* EPT memory type is used to emulate guest CD/MTRR.
*/
- if (is_mmio) {
- cache = MTRR_TYPE_UNCACHABLE;
- goto exit;
- }
+ if (is_mmio)
+ return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
- if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
- ipat = VMX_EPT_IPAT_BIT;
- cache = MTRR_TYPE_WRBACK;
- goto exit;
- }
+ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
+ return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
- ipat = VMX_EPT_IPAT_BIT;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
cache = MTRR_TYPE_UNCACHABLE;
- goto exit;
- }
- cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
+ }
-exit:
- return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
+ return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;
}
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
@@ -7206,6 +7324,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
}
}
+ if (kvm_cpu_cap_has(X86_FEATURE_XFD))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+
+
set_cr4_guest_host_mask(vmx);
vmx_write_encls_bitmap(vcpu, NULL);
@@ -7445,25 +7568,6 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
}
-static int vmx_pre_block(struct kvm_vcpu *vcpu)
-{
- if (pi_pre_block(vcpu))
- return 1;
-
- if (kvm_lapic_hv_timer_in_use(vcpu))
- kvm_lapic_switch_to_sw_timer(vcpu);
-
- return 0;
-}
-
-static void vmx_post_block(struct kvm_vcpu *vcpu)
-{
- if (kvm_x86_ops.set_hv_timer)
- kvm_lapic_switch_to_hv_timer(vcpu);
-
- pi_post_block(vcpu);
-}
-
static void vmx_setup_mce(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.mcg_cap & MCG_LMCE_P)
@@ -7606,6 +7710,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.tlb_flush_gva = vmx_flush_tlb_gva,
.tlb_flush_guest = vmx_flush_tlb_guest,
+ .vcpu_pre_run = vmx_vcpu_pre_run,
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
.skip_emulated_instruction = vmx_skip_emulated_instruction,
@@ -7664,9 +7769,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.cpu_dirty_log_size = PML_ENTITY_NUM,
.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
- .pre_block = vmx_pre_block,
- .post_block = vmx_post_block,
-
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
@@ -7741,7 +7843,7 @@ static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
- int r, ept_lpage_level;
+ int r;
store_idt(&dt);
host_idt_base = dt.address;
@@ -7838,16 +7940,8 @@ static __init int hardware_setup(void)
kvm_mmu_set_ept_masks(enable_ept_ad_bits,
cpu_has_vmx_ept_execute_only());
- if (!enable_ept)
- ept_lpage_level = 0;
- else if (cpu_has_vmx_ept_1g_page())
- ept_lpage_level = PG_LEVEL_1G;
- else if (cpu_has_vmx_ept_2m_page())
- ept_lpage_level = PG_LEVEL_2M;
- else
- ept_lpage_level = PG_LEVEL_4K;
kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
- ept_lpage_level);
+ ept_caps_to_lpage_level(vmx_capability.ept));
/*
* Only enable PML when hardware supports PML feature, and both EPT
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4df2ac24ffc1..7f2c82e7f38f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -159,6 +159,15 @@ struct nested_vmx {
bool dirty_vmcs12;
/*
+ * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
+ * changes in MSR bitmap for L1 or switching to a different L2. Note,
+ * this flag can only be used reliably in conjunction with a paravirt L1
+ * which informs L0 whether any changes to MSR bitmap for L2 were done
+ * on its side.
+ */
+ bool force_msr_bitmap_recalc;
+
+ /*
* Indicates lazily loaded guest state has not yet been decached from
* vmcs02.
*/
@@ -308,6 +317,9 @@ struct vcpu_vmx {
/* Posted interrupt descriptor */
struct pi_desc pi_desc;
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
@@ -340,7 +352,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@@ -362,8 +374,9 @@ int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
-void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base);
+void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
+ u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
@@ -473,19 +486,21 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
-static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
- | (1 << VCPU_EXREG_RFLAGS)
- | (1 << VCPU_EXREG_PDPTR)
- | (1 << VCPU_EXREG_SEGMENTS)
- | (1 << VCPU_EXREG_CR0)
- | (1 << VCPU_EXREG_CR3)
- | (1 << VCPU_EXREG_CR4)
- | (1 << VCPU_EXREG_EXIT_INFO_1)
- | (1 << VCPU_EXREG_EXIT_INFO_2));
- vcpu->arch.regs_dirty = 0;
-}
+/*
+ * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
+ * cache on demand. Other registers not listed here are synced to
+ * the cache immediately after VM-Exit.
+ */
+#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \
+ (1 << VCPU_REGS_RSP) | \
+ (1 << VCPU_EXREG_RFLAGS) | \
+ (1 << VCPU_EXREG_PDPTR) | \
+ (1 << VCPU_EXREG_SEGMENTS) | \
+ (1 << VCPU_EXREG_CR0) | \
+ (1 << VCPU_EXREG_CR3) | \
+ (1 << VCPU_EXREG_CR4) | \
+ (1 << VCPU_EXREG_EXIT_INFO_1) | \
+ (1 << VCPU_EXREG_EXIT_INFO_2))
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 35d9324c2f2a..5e7f41225780 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -71,6 +71,31 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
{
unsigned long value;
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+ asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ "jna %l[do_fail]\n\t"
+
+ _ASM_EXTABLE(1b, %l[do_exception])
+
+ : [output] "=r" (value)
+ : [field] "r" (field)
+ : "cc"
+ : do_fail, do_exception);
+
+ return value;
+
+do_fail:
+ WARN_ONCE(1, "kvm: vmread failed: field=%lx\n", field);
+ pr_warn_ratelimited("kvm: vmread failed: field=%lx\n", field);
+ return 0;
+
+do_exception:
+ kvm_spurious_fault();
+ return 0;
+
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
asm volatile("1: vmread %2, %1\n\t"
".byte 0x3e\n\t" /* branch taken hint */
"ja 3f\n\t"
@@ -99,6 +124,8 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
: ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc");
return value;
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
}
static __always_inline u16 vmcs_read16(unsigned long field)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 829d03fcb481..9e43d756312f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -118,6 +118,7 @@ static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
@@ -186,6 +187,11 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+/* Enable/disable PMU virtualization */
+bool __read_mostly enable_pmu = true;
+EXPORT_SYMBOL_GPL(enable_pmu);
+module_param(enable_pmu, bool, 0444);
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -210,7 +216,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
- | XFEATURE_MASK_PKRU)
+ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
@@ -710,6 +716,17 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
}
EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
+static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
+{
+ if (err) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE | EMULTYPE_SKIP |
+ EMULTYPE_COMPLETE_USER_EXIT);
+}
+
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
++vcpu->stat.pf_guest;
@@ -798,8 +815,9 @@ static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
/*
* Load the pae pdptrs. Return 1 if they are all valid, 0 otherwise.
*/
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
gpa_t real_gpa;
int i;
@@ -810,8 +828,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
* If the MMU is nested, CR3 holds an L2 GPA and needs to be translated
* to an L1 GPA.
*/
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(pdpt_gfn),
- PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
+ real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
+ PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
if (real_gpa == UNMAPPED_GVA)
return 0;
@@ -828,8 +846,16 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
}
}
+ /*
+ * Marking VCPU_EXREG_PDPTR dirty doesn't work for !tdp_enabled.
+ * Shadow page roots need to be reconstructed instead.
+ */
+ if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
+ kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOT_CURRENT);
+
memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+ kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
vcpu->arch.pdptrs_from_userspace = false;
return 1;
@@ -856,7 +882,6 @@ EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
- unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
cr0 |= X86_CR0_ET;
@@ -886,8 +911,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
- is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
- !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
+ is_pae(vcpu) && ((cr0 ^ old_cr0) & X86_CR0_PDPTR_BITS) &&
+ !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
return 1;
if (!(cr0 & X86_CR0_PG) &&
@@ -990,6 +1015,11 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
return 1;
}
+
+ if ((xcr0 & XFEATURE_MASK_XTILE) &&
+ ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE))
+ return 1;
+
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -1051,8 +1081,6 @@ EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
- X86_CR4_SMEP;
if (!kvm_is_valid_cr4(vcpu, cr4))
return 1;
@@ -1063,9 +1091,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if ((cr4 ^ old_cr4) & X86_CR4_LA57)
return 1;
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
- && ((cr4 ^ old_cr4) & pdptr_bits)
- && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
- kvm_read_cr3(vcpu)))
+ && ((cr4 ^ old_cr4) & X86_CR4_PDPTR_BITS)
+ && !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
return 1;
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
@@ -1154,14 +1181,15 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
return 1;
- if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+ if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
return 1;
if (cr3 != kvm_read_cr3(vcpu))
kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ /* Do not call post_set_cr3, we do not get here for confidential guests. */
handle_tlb_flush:
/*
@@ -1359,6 +1387,7 @@ static const u32 msrs_to_save_all[] = {
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+ MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
@@ -1815,22 +1844,36 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_msr);
-static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
{
- int err = vcpu->run->msr.error;
- if (!err) {
+ if (!vcpu->run->msr.error) {
kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
}
+}
- return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
+static int complete_emulated_msr_access(struct kvm_vcpu *vcpu)
+{
+ return complete_emulated_insn_gp(vcpu, vcpu->run->msr.error);
+}
+
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+{
+ complete_userspace_rdmsr(vcpu);
+ return complete_emulated_msr_access(vcpu);
}
-static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
+static int complete_fast_msr_access(struct kvm_vcpu *vcpu)
{
return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
}
+static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
+{
+ complete_userspace_rdmsr(vcpu);
+ return complete_fast_msr_access(vcpu);
+}
+
static u64 kvm_msr_reason(int r)
{
switch (r) {
@@ -1865,18 +1908,6 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
return 1;
}
-static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
-{
- return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
- complete_emulated_rdmsr, r);
-}
-
-static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
-{
- return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
- complete_emulated_wrmsr, r);
-}
-
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = kvm_rcx_read(vcpu);
@@ -1885,18 +1916,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
r = kvm_get_msr(vcpu, ecx, &data);
- /* MSR read failed? See if we should ask user space */
- if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
- /* Bounce to user space */
- return 0;
- }
-
if (!r) {
trace_kvm_msr_read(ecx, data);
kvm_rax_write(vcpu, data & -1u);
kvm_rdx_write(vcpu, (data >> 32) & -1u);
} else {
+ /* MSR read failed? See if we should ask user space */
+ if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
+ complete_fast_rdmsr, r))
+ return 0;
trace_kvm_msr_read_ex(ecx);
}
@@ -1912,19 +1941,18 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
r = kvm_set_msr(vcpu, ecx, data);
- /* MSR write failed? See if we should ask user space */
- if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
- /* Bounce to user space */
- return 0;
-
- /* Signal all other negative errors to userspace */
- if (r < 0)
- return r;
-
- if (!r)
+ if (!r) {
trace_kvm_msr_write(ecx, data);
- else
+ } else {
+ /* MSR write failed? See if we should ask user space */
+ if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
+ complete_fast_msr_access, r))
+ return 0;
+ /* Signal all other negative errors to userspace */
+ if (r < 0)
+ return r;
trace_kvm_msr_write_ex(ecx, data);
+ }
return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
}
@@ -2119,7 +2147,7 @@ static s64 get_kvmclock_base_ns(void)
}
#endif
-void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
+static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
{
int version;
int r;
@@ -2817,7 +2845,7 @@ static void kvm_end_pvclock_update(struct kvm *kvm)
{
struct kvm_arch *ka = &kvm->arch;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
write_seqcount_end(&ka->pvclock_sc);
raw_spin_unlock_irq(&ka->tsc_write_lock);
@@ -3066,7 +3094,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
static void kvmclock_update_fn(struct work_struct *work)
{
- int i;
+ unsigned long i;
struct delayed_work *dwork = to_delayed_work(work);
struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
kvmclock_update_work);
@@ -3669,6 +3697,30 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.msr_misc_features_enables = data;
break;
+#ifdef CONFIG_X86_64
+ case MSR_IA32_XFD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
+ vcpu->arch.guest_supported_xcr0))
+ return 1;
+
+ fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
+ break;
+ case MSR_IA32_XFD_ERR:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
+ vcpu->arch.guest_supported_xcr0))
+ return 1;
+
+ vcpu->arch.guest_fpu.xfd_err = data;
+ break;
+#endif
default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
@@ -3989,6 +4041,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_HWCR:
msr_info->data = vcpu->arch.msr_hwcr;
break;
+#ifdef CONFIG_X86_64
+ case MSR_IA32_XFD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
+ break;
+ case MSR_IA32_XFD_ERR:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ msr_info->data = vcpu->arch.guest_fpu.xfd_err;
+ break;
+#endif
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
@@ -4172,7 +4240,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
- KVM_XEN_HVM_CONFIG_SHARED_INFO;
+ KVM_XEN_HVM_CONFIG_SHARED_INFO |
+ KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
break;
@@ -4250,6 +4319,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else
r = 0;
break;
+ case KVM_CAP_XSAVE2: {
+ u64 guest_perm = xstate_get_guest_group_perm();
+
+ r = xstate_required_size(supported_xcr0 & guest_perm, false);
+ if (r < sizeof(struct kvm_xsave))
+ r = sizeof(struct kvm_xsave);
+ break;
+ }
default:
break;
}
@@ -4853,6 +4930,16 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
vcpu->arch.pkru);
}
+static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
+{
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return;
+
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+ state, size, vcpu->arch.pkru);
+}
+
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
@@ -5148,17 +5235,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
- /*
- * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
- * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
- * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
- * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
- * the core vCPU model on the fly, so fail.
- */
- r = -EINVAL;
- if (vcpu->arch.last_vmentry_cpu != -1)
- goto out;
-
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -5169,14 +5245,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
- /*
- * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
- * KVM_SET_CPUID case above.
- */
- r = -EINVAL;
- if (vcpu->arch.last_vmentry_cpu != -1)
- goto out;
-
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -5306,6 +5374,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XSAVE: {
+ r = -EINVAL;
+ if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
+ break;
+
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.xsave)
@@ -5320,7 +5392,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_XSAVE: {
- u.xsave = memdup_user(argp, sizeof(*u.xsave));
+ int size = vcpu->arch.guest_fpu.uabi_size;
+
+ u.xsave = memdup_user(argp, size);
if (IS_ERR(u.xsave)) {
r = PTR_ERR(u.xsave);
goto out_nofree;
@@ -5329,6 +5403,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
}
+
+ case KVM_GET_XSAVE2: {
+ int size = vcpu->arch.guest_fpu.uabi_size;
+
+ u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ r = -ENOMEM;
+ if (!u.xsave)
+ break;
+
+ kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, u.xsave, size))
+ break;
+
+ r = 0;
+ break;
+ }
+
case KVM_GET_XCRS: {
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
@@ -5693,7 +5786,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
* VM-Exit.
*/
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vcpu_kick(vcpu);
@@ -5962,7 +6055,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i, ret = 0;
+ unsigned long i;
+ int ret = 0;
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -6422,6 +6516,11 @@ static void kvm_init_msr_list(void)
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
break;
+ case MSR_IA32_XFD:
+ case MSR_IA32_XFD_ERR:
+ if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
+ continue;
+ break;
default:
break;
}
@@ -6505,13 +6604,14 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
gpa_t t_gpa;
BUG_ON(!mmu_is_nested(vcpu));
/* NPT walks are always user-walks */
access |= PFERR_USER_MASK;
- t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
+ t_gpa = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
return t_gpa;
}
@@ -6519,25 +6619,31 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+ return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_FETCH_MASK;
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+ return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_WRITE_MASK;
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+ return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
@@ -6545,19 +6651,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+ return mmu->gva_to_gpa(vcpu, mmu, gva, 0, exception);
}
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu, u32 access,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
- exception);
+ gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -6585,13 +6693,14 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
struct x86_exception *exception)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
unsigned offset;
int ret;
/* Inline kvm_read_guest_virt_helper for speed. */
- gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
- exception);
+ gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access|PFERR_FETCH_MASK,
+ exception);
if (unlikely(gpa == UNMAPPED_GVA))
return X86EMUL_PROPAGATE_FAULT;
@@ -6650,13 +6759,12 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes
struct kvm_vcpu *vcpu, u32 access,
struct x86_exception *exception)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
- access,
- exception);
+ gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -6743,6 +6851,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
@@ -6760,7 +6869,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
return 1;
}
- *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+ *gpa = mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
if (*gpa == UNMAPPED_GVA)
return -1;
@@ -7394,7 +7503,8 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
r = kvm_get_msr(vcpu, msr_index, pdata);
- if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
+ if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
+ complete_emulated_rdmsr, r)) {
/* Bounce to user space */
return X86EMUL_IO_NEEDED;
}
@@ -7410,7 +7520,8 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
r = kvm_set_msr(vcpu, msr_index, data);
- if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
+ if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
+ complete_emulated_msr_access, r)) {
/* Bounce to user space */
return X86EMUL_IO_NEEDED;
}
@@ -7961,6 +8072,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (unlikely(!r))
return 0;
+ kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+
/*
* rflags is the old, "raw" value of the flags. The new value has
* not been saved yet.
@@ -8128,12 +8241,23 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
}
/*
- * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
- * for kvm_skip_emulated_instruction(). The caller is responsible for
- * updating interruptibility state and injecting single-step #DBs.
+ * EMULTYPE_SKIP without EMULTYPE_COMPLETE_USER_EXIT is intended for
+ * use *only* by vendor callbacks for kvm_skip_emulated_instruction().
+ * The caller is responsible for updating interruptibility state and
+ * injecting single-step #DBs.
*/
if (emulation_type & EMULTYPE_SKIP) {
- kvm_rip_write(vcpu, ctxt->_eip);
+ if (ctxt->mode != X86EMUL_MODE_PROT64)
+ ctxt->eip = (u32)ctxt->_eip;
+ else
+ ctxt->eip = ctxt->_eip;
+
+ if (emulation_type & EMULTYPE_COMPLETE_USER_EXIT) {
+ r = 1;
+ goto writeback;
+ }
+
+ kvm_rip_write(vcpu, ctxt->eip);
if (ctxt->eflags & X86_EFLAGS_RF)
kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
return 1;
@@ -8197,17 +8321,24 @@ restart:
writeback = false;
r = 0;
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+ } else if (vcpu->arch.complete_userspace_io) {
+ writeback = false;
+ r = 0;
} else if (r == EMULATION_RESTART)
goto restart;
else
r = 1;
+writeback:
if (writeback) {
unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
+ kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+ if (ctxt->is_branch)
+ kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
@@ -8394,7 +8525,8 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
- int i, send_ipi = 0;
+ int send_ipi = 0;
+ unsigned long i;
/*
* We allow guests to temporarily run on slowing clocks,
@@ -8523,9 +8655,8 @@ static void kvm_timer_init(void)
static void pvclock_gtod_update_fn(struct work_struct *work)
{
struct kvm *kvm;
-
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
@@ -8683,8 +8814,15 @@ void kvm_arch_exit(void)
#endif
}
-static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
+static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
{
+ /*
+ * The vCPU has halted, e.g. executed HLT. Update the run state if the
+ * local APIC is in-kernel, the run loop will detect the non-runnable
+ * state and halt the vCPU. Exit to userspace if the local APIC is
+ * managed by userspace, in which case userspace is responsible for
+ * handling wake events.
+ */
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
vcpu->arch.mp_state = state;
@@ -8695,11 +8833,11 @@ static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
}
}
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
{
- return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+ return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
@@ -8708,7 +8846,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
* TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
* KVM_EXIT_DEBUG here.
*/
- return kvm_vcpu_halt(vcpu) && ret;
+ return kvm_emulate_halt_noskip(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
@@ -8716,7 +8854,8 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
{
int ret = kvm_skip_emulated_instruction(vcpu);
- return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+ return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
+ KVM_EXIT_AP_RESET_HOLD) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
@@ -9792,10 +9931,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
smp_mb__after_srcu_read_unlock();
/*
- * This handles the case where a posted interrupt was
- * notified with kvm_vcpu_kick. Assigned devices can
- * use the POSTED_INTR_VECTOR even if APICv is disabled,
- * so do it even if APICv is disabled on this vCPU.
+ * Process pending posted interrupts to handle the case where the
+ * notification IRQ arrived in the host, or was never sent (because the
+ * target vCPU wasn't running). Do this regardless of the vCPU's APICv
+ * status, KVM doesn't update assigned devices when APICv is inhibited,
+ * i.e. they can post interrupts even if APICv is temporarily disabled.
*/
if (kvm_lapic_enabled(vcpu))
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
@@ -9819,6 +9959,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_return();
+ if (vcpu->arch.guest_fpu.xfd_err)
+ wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -9880,8 +10023,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
+ /*
+ * Sync xfd before calling handle_exit_irqoff() which may
+ * rely on the fact that guest_fpu::xfd is up-to-date (e.g.
+ * in #NM irqoff handler).
+ */
+ if (vcpu->arch.xfd_no_write_intercept)
+ fpu_sync_guest_vmexit_xfd_state();
+
static_call(kvm_x86_handle_exit_irqoff)(vcpu);
+ if (vcpu->arch.guest_fpu.xfd_err)
+ wrmsrl(MSR_IA32_XFD_ERR, 0);
+
/*
* Consume any pending interrupts, including the possible source of
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
@@ -9946,14 +10100,29 @@ out:
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
- if (!kvm_arch_vcpu_runnable(vcpu) &&
- (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
+ bool hv_timer;
+
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ /*
+ * Switch to the software timer before halt-polling/blocking as
+ * the guest's timer may be a break event for the vCPU, and the
+ * hypervisor timer runs only when the CPU is in guest mode.
+ * Switch before halt-polling so that KVM recognizes an expired
+ * timer before blocking.
+ */
+ hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
+ if (hv_timer)
+ kvm_lapic_switch_to_sw_timer(vcpu);
+
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
+ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+ kvm_vcpu_halt(vcpu);
+ else
+ kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_x86_ops.post_block)
- static_call(kvm_x86_post_block)(vcpu);
+ if (hv_timer)
+ kvm_lapic_switch_to_hv_timer(vcpu);
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
return 1;
@@ -10146,6 +10315,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = -EINTR;
goto out;
}
+ /*
+ * It should be impossible for the hypervisor timer to be in
+ * use before KVM has ever run the vCPU.
+ */
+ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
kvm_vcpu_block(vcpu);
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
@@ -10190,10 +10364,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
- if (kvm_run->immediate_exit)
+ if (kvm_run->immediate_exit) {
r = -EINTR;
- else
- r = vcpu_run(vcpu);
+ goto out;
+ }
+
+ r = static_call(kvm_x86_vcpu_pre_run)(vcpu);
+ if (r <= 0)
+ goto out;
+
+ r = vcpu_run(vcpu);
out:
kvm_put_guest_fpu(vcpu);
@@ -10509,7 +10689,8 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
vcpu->arch.cr2 = sregs->cr2;
*mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ static_call_cond(kvm_x86_post_set_cr3)(vcpu, sregs->cr3);
kvm_set_cr8(vcpu, sregs->cr8);
@@ -10526,7 +10707,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
if (update_pdptrs) {
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (is_pae_paging(vcpu)) {
- load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
+ load_pdptrs(vcpu, kvm_read_cr3(vcpu));
*mmu_reset_needed = 1;
}
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -10624,7 +10805,7 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
{
bool inhibit = false;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
down_write(&kvm->arch.apicv_update_lock);
@@ -11112,7 +11293,7 @@ int kvm_arch_hardware_enable(void)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
int ret;
u64 local_tsc;
u64 max_tsc = 0;
@@ -11369,7 +11550,7 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
static void kvm_free_vcpus(struct kvm *kvm)
{
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
/*
@@ -11379,15 +11560,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
kvm_clear_async_pf_completion_queue(vcpu);
kvm_unload_vcpu_mmu(vcpu);
}
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_destroy(vcpu);
- mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
- mutex_unlock(&kvm->lock);
+ kvm_destroy_vcpus(kvm);
}
void kvm_arch_sync_events(struct kvm *kvm)
@@ -11555,9 +11729,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
}
static int kvm_alloc_memslot_metadata(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages)
+ struct kvm_memory_slot *slot)
{
+ unsigned long npages = slot->npages;
int i, r;
/*
@@ -11622,7 +11796,7 @@ out_free:
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
/*
* memslots->generation has been incremented.
@@ -11636,13 +11810,18 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
- return kvm_alloc_memslot_metadata(kvm, memslot,
- mem->memory_size >> PAGE_SHIFT);
+ return kvm_alloc_memslot_metadata(kvm, new);
+
+ if (change == KVM_MR_FLAGS_ONLY)
+ memcpy(&new->arch, &old->arch, sizeof(old->arch));
+ else if (WARN_ON_ONCE(change != KVM_MR_DELETE))
+ return -EIO;
+
return 0;
}
@@ -11666,13 +11845,15 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+ u32 old_flags = old ? old->flags : 0;
+ u32 new_flags = new ? new->flags : 0;
+ bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
/*
* Update CPU dirty logging if dirty logging is being toggled. This
* applies to all operations.
*/
- if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+ if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)
kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
/*
@@ -11690,7 +11871,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot().
*/
- if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
+ if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY))
return;
/*
@@ -11698,7 +11879,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
* logging isn't being toggled on or off.
*/
- if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+ if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)))
return;
if (!log_dirty_pages) {
@@ -11734,14 +11915,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- if (!kvm->arch.n_requested_mmu_pages)
- kvm_mmu_change_mmu_pages(kvm,
- kvm_mmu_calculate_default_mmu_pages(kvm));
+ if (!kvm->arch.n_requested_mmu_pages &&
+ (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
+ unsigned long nr_mmu_pages;
+
+ nr_mmu_pages = kvm->nr_memslot_pages / KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO;
+ nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
+ kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+ }
kvm_mmu_slot_apply_flags(kvm, old, new, change);
@@ -12256,12 +12441,13 @@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
struct x86_exception fault;
u32 access = error_code &
(PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
if (!(error_code & PFERR_PRESENT_MASK) ||
- vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
+ mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != UNMAPPED_GVA) {
/*
* If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
* tables probably do not match the TLB. Just proceed
@@ -12598,6 +12784,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6aeca8f1da91..635b75f9e145 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -301,7 +301,6 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
return is_smm(vcpu) || static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
}
-void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs);
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
@@ -337,6 +336,7 @@ extern u64 host_xcr0;
extern u64 supported_xcr0;
extern u64 host_xss;
extern u64 supported_xss;
+extern bool enable_pmu;
static inline bool kvm_mpx_supported(void)
{
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index dff2bdf9507a..0e3f7d6e9fd7 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -16,6 +16,7 @@
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/event_channel.h>
#include "trace.h"
@@ -23,38 +24,77 @@ DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
{
+ struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+ struct pvclock_wall_clock *wc;
gpa_t gpa = gfn_to_gpa(gfn);
- int wc_ofs, sec_hi_ofs;
+ u32 *wc_sec_hi;
+ u32 wc_version;
+ u64 wall_nsec;
int ret = 0;
int idx = srcu_read_lock(&kvm->srcu);
- if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) {
- ret = -EFAULT;
+ if (gfn == GPA_INVALID) {
+ kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
goto out;
}
- kvm->arch.xen.shinfo_gfn = gfn;
+
+ do {
+ ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true,
+ gpa, PAGE_SIZE, false);
+ if (ret)
+ goto out;
+
+ /*
+ * This code mirrors kvm_write_wall_clock() except that it writes
+ * directly through the pfn cache and doesn't mark the page dirty.
+ */
+ wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
+
+ /* It could be invalid again already, so we need to check */
+ read_lock_irq(&gpc->lock);
+
+ if (gpc->valid)
+ break;
+
+ read_unlock_irq(&gpc->lock);
+ } while (1);
/* Paranoia checks on the 32-bit struct layout */
BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
- /* 32-bit location by default */
- wc_ofs = offsetof(struct compat_shared_info, wc);
- sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi);
-
#ifdef CONFIG_X86_64
/* Paranoia checks on the 64-bit struct layout */
BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
- if (kvm->arch.xen.long_mode) {
- wc_ofs = offsetof(struct shared_info, wc);
- sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi);
- }
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+ struct shared_info *shinfo = gpc->khva;
+
+ wc_sec_hi = &shinfo->wc_sec_hi;
+ wc = &shinfo->wc;
+ } else
#endif
+ {
+ struct compat_shared_info *shinfo = gpc->khva;
+
+ wc_sec_hi = &shinfo->arch.wc_sec_hi;
+ wc = &shinfo->wc;
+ }
+
+ /* Increment and ensure an odd value */
+ wc_version = wc->version = (wc->version + 1) | 1;
+ smp_wmb();
+
+ wc->nsec = do_div(wall_nsec, 1000000000);
+ wc->sec = (u32)wall_nsec;
+ *wc_sec_hi = wall_nsec >> 32;
+ smp_wmb();
+
+ wc->version = wc_version + 1;
+ read_unlock_irq(&gpc->lock);
- kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs);
kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
out:
@@ -190,6 +230,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
+ unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
+ bool atomic = in_atomic() || !task_is_running(current);
int err;
u8 rc = 0;
@@ -199,6 +241,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
*/
struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
struct kvm_memslots *slots = kvm_memslots(v->kvm);
+ bool ghc_valid = slots->generation == ghc->generation &&
+ !kvm_is_error_hva(ghc->hva) && ghc->memslot;
+
unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
/* No need for compat handling here */
@@ -214,8 +259,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
* cache in kvm_read_guest_offset_cached(), but just uses
* __get_user() instead. And falls back to the slow path.
*/
- if (likely(slots->generation == ghc->generation &&
- !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
+ if (!evtchn_pending_sel && ghc_valid) {
/* Fast path */
pagefault_disable();
err = __get_user(rc, (u8 __user *)ghc->hva + offset);
@@ -234,11 +278,82 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
* and we'll end up getting called again from a context where we *can*
* fault in the page and wait for it.
*/
- if (in_atomic() || !task_is_running(current))
+ if (atomic)
return 1;
- kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
- sizeof(rc));
+ if (!ghc_valid) {
+ err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
+ if (err || !ghc->memslot) {
+ /*
+ * If this failed, userspace has screwed up the
+ * vcpu_info mapping. No interrupts for you.
+ */
+ return 0;
+ }
+ }
+
+ /*
+ * Now we have a valid (protected by srcu) userspace HVA in
+ * ghc->hva which points to the struct vcpu_info. If there
+ * are any bits in the in-kernel evtchn_pending_sel then
+ * we need to write those to the guest vcpu_info and set
+ * its evtchn_upcall_pending flag. If there aren't any bits
+ * to add, we only want to *check* evtchn_upcall_pending.
+ */
+ if (evtchn_pending_sel) {
+ bool long_mode = v->kvm->arch.xen.long_mode;
+
+ if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
+ return 0;
+
+ if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
+ struct vcpu_info __user *vi = (void __user *)ghc->hva;
+
+ /* Attempt to set the evtchn_pending_sel bits in the
+ * guest, and if that succeeds then clear the same
+ * bits in the in-kernel version. */
+ asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
+ "\tnotq %0\n"
+ "\t" LOCK_PREFIX "andq %0, %2\n"
+ "2:\n"
+ "\t.section .fixup,\"ax\"\n"
+ "3:\tjmp\t2b\n"
+ "\t.previous\n"
+ _ASM_EXTABLE_UA(1b, 3b)
+ : "=r" (evtchn_pending_sel),
+ "+m" (vi->evtchn_pending_sel),
+ "+m" (v->arch.xen.evtchn_pending_sel)
+ : "0" (evtchn_pending_sel));
+ } else {
+ struct compat_vcpu_info __user *vi = (void __user *)ghc->hva;
+ u32 evtchn_pending_sel32 = evtchn_pending_sel;
+
+ /* Attempt to set the evtchn_pending_sel bits in the
+ * guest, and if that succeeds then clear the same
+ * bits in the in-kernel version. */
+ asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
+ "\tnotl %0\n"
+ "\t" LOCK_PREFIX "andl %0, %2\n"
+ "2:\n"
+ "\t.section .fixup,\"ax\"\n"
+ "3:\tjmp\t2b\n"
+ "\t.previous\n"
+ _ASM_EXTABLE_UA(1b, 3b)
+ : "=r" (evtchn_pending_sel32),
+ "+m" (vi->evtchn_pending_sel),
+ "+m" (v->arch.xen.evtchn_pending_sel)
+ : "0" (evtchn_pending_sel32));
+ }
+ rc = 1;
+ unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
+
+ err:
+ user_access_end();
+
+ mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ } else {
+ __get_user(rc, (u8 __user *)ghc->hva + offset);
+ }
return rc;
}
@@ -260,15 +375,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- if (data->u.shared_info.gfn == GPA_INVALID) {
- kvm->arch.xen.shinfo_gfn = GPA_INVALID;
- r = 0;
- break;
- }
r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
break;
-
case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
if (data->u.vector && data->u.vector < 0x10)
r = -EINVAL;
@@ -299,7 +408,10 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
+ if (kvm->arch.xen.shinfo_cache.active)
+ data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
+ else
+ data->u.shared_info.gfn = GPA_INVALID;
r = 0;
break;
@@ -661,11 +773,12 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
void kvm_xen_init_vm(struct kvm *kvm)
{
- kvm->arch.xen.shinfo_gfn = GPA_INVALID;
}
void kvm_xen_destroy_vm(struct kvm *kvm)
{
+ kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+
if (kvm->arch.xen_hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
}
@@ -737,3 +850,179 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
return 0;
}
+
+static inline int max_evtchn_port(struct kvm *kvm)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
+ return EVTCHN_2L_NR_CHANNELS;
+ else
+ return COMPAT_EVTCHN_2L_NR_CHANNELS;
+}
+
+/*
+ * This follows the kvm_set_irq() API, so it returns:
+ * < 0 Interrupt was ignored (masked or not delivered for other reasons)
+ * = 0 Interrupt was coalesced (previous irq is still pending)
+ * > 0 Number of CPUs interrupt was delivered to
+ */
+int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm)
+{
+ struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+ struct kvm_vcpu *vcpu;
+ unsigned long *pending_bits, *mask_bits;
+ unsigned long flags;
+ int port_word_bit;
+ bool kick_vcpu = false;
+ int idx;
+ int rc;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu);
+ if (!vcpu)
+ return -1;
+
+ if (!vcpu->arch.xen.vcpu_info_set)
+ return -1;
+
+ if (e->xen_evtchn.port >= max_evtchn_port(kvm))
+ return -1;
+
+ rc = -EWOULDBLOCK;
+ read_lock_irqsave(&gpc->lock, flags);
+
+ idx = srcu_read_lock(&kvm->srcu);
+ if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+ goto out_rcu;
+
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+ struct shared_info *shinfo = gpc->khva;
+ pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+ mask_bits = (unsigned long *)&shinfo->evtchn_mask;
+ port_word_bit = e->xen_evtchn.port / 64;
+ } else {
+ struct compat_shared_info *shinfo = gpc->khva;
+ pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+ mask_bits = (unsigned long *)&shinfo->evtchn_mask;
+ port_word_bit = e->xen_evtchn.port / 32;
+ }
+
+ /*
+ * If this port wasn't already set, and if it isn't masked, then
+ * we try to set the corresponding bit in the in-kernel shadow of
+ * evtchn_pending_sel for the target vCPU. And if *that* wasn't
+ * already set, then we kick the vCPU in question to write to the
+ * *real* evtchn_pending_sel in its own guest vcpu_info struct.
+ */
+ if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) {
+ rc = 0; /* It was already raised */
+ } else if (test_bit(e->xen_evtchn.port, mask_bits)) {
+ rc = -1; /* Masked */
+ } else {
+ rc = 1; /* Delivered. But was the vCPU waking already? */
+ if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
+ kick_vcpu = true;
+ }
+
+ out_rcu:
+ srcu_read_unlock(&kvm->srcu, idx);
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ if (kick_vcpu) {
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ return rc;
+}
+
+/* This is the version called from kvm_set_irq() as the .set function */
+static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ bool mm_borrowed = false;
+ int rc;
+
+ if (!level)
+ return -1;
+
+ rc = kvm_xen_set_evtchn_fast(e, kvm);
+ if (rc != -EWOULDBLOCK)
+ return rc;
+
+ if (current->mm != kvm->mm) {
+ /*
+ * If not on a thread which already belongs to this KVM,
+ * we'd better be in the irqfd workqueue.
+ */
+ if (WARN_ON_ONCE(current->mm))
+ return -EINVAL;
+
+ kthread_use_mm(kvm->mm);
+ mm_borrowed = true;
+ }
+
+ /*
+ * For the irqfd workqueue, using the main kvm->lock mutex is
+ * fine since this function is invoked from kvm_set_irq() with
+ * no other lock held, no srcu. In future if it will be called
+ * directly from a vCPU thread (e.g. on hypercall for an IPI)
+ * then it may need to switch to using a leaf-node mutex for
+ * serializing the shared_info mapping.
+ */
+ mutex_lock(&kvm->lock);
+
+ /*
+ * It is theoretically possible for the page to be unmapped
+ * and the MMU notifier to invalidate the shared_info before
+ * we even get to use it. In that case, this looks like an
+ * infinite loop. It was tempting to do it via the userspace
+ * HVA instead... but that just *hides* the fact that it's
+ * an infinite loop, because if a fault occurs and it waits
+ * for the page to come back, it can *still* immediately
+ * fault and have to wait again, repeatedly.
+ *
+ * Conversely, the page could also have been reinstated by
+ * another thread before we even obtain the mutex above, so
+ * check again *first* before remapping it.
+ */
+ do {
+ struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+ int idx;
+
+ rc = kvm_xen_set_evtchn_fast(e, kvm);
+ if (rc != -EWOULDBLOCK)
+ break;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa,
+ PAGE_SIZE, false);
+ srcu_read_unlock(&kvm->srcu, idx);
+ } while(!rc);
+
+ mutex_unlock(&kvm->lock);
+
+ if (mm_borrowed)
+ kthread_unuse_mm(kvm->mm);
+
+ return rc;
+}
+
+int kvm_xen_setup_evtchn(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+
+{
+ if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
+ return -EINVAL;
+
+ /* We only support 2 level event channels for now */
+ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
+ return -EINVAL;
+
+ e->xen_evtchn.port = ue->u.xen_evtchn.port;
+ e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu;
+ e->xen_evtchn.priority = ue->u.xen_evtchn.priority;
+ e->set = evtchn_set_fn;
+
+ return 0;
+}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index cc0cf5f37450..adbcc9ed59db 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -24,6 +24,12 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
void kvm_xen_init_vm(struct kvm *kvm);
void kvm_xen_destroy_vm(struct kvm *kvm);
+int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm);
+int kvm_xen_setup_evtchn(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue);
+
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -134,6 +140,9 @@ struct compat_shared_info {
struct compat_arch_shared_info arch;
};
+#define COMPAT_EVTCHN_2L_NR_CHANNELS (8 * \
+ sizeof_field(struct compat_shared_info, \
+ evtchn_pending))
struct compat_vcpu_runstate_info {
int state;
uint64_t state_entry_time;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4bfed53e210e..d0074c6ed31a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1413,8 +1413,7 @@ good_area:
* and if there is a fatal signal pending there is no guarantee
* that we made any progress. Handle this case first.
*/
- if (unlikely((fault & VM_FAULT_RETRY) &&
- (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ if (unlikely(fault & VM_FAULT_RETRY)) {
flags |= FAULT_FLAG_TRIED;
goto retry;
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 948656069cdd..052f1d78a562 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -20,7 +20,7 @@ struct pci_root_info {
};
static bool pci_use_crs = true;
-static bool pci_ignore_seg = false;
+static bool pci_ignore_seg;
static int __init set_use_crs(const struct dmi_system_id *id)
{
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 95d26a69088b..40d6a06e41c8 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,7 +8,6 @@ endmenu
config UML_X86
def_bool y
- select GENERIC_FIND_FIRST_BIT
config 64BIT
bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 9778216d6e09..ee2769519eaf 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -12,7 +12,7 @@
# Core configuration.
# (Use VAR=<xtensa_config> to use another default compiler.)
-variant-y := $(patsubst "%",%,$(CONFIG_XTENSA_VARIANT_NAME))
+variant-y := $(CONFIG_XTENSA_VARIANT_NAME)
VARIANT = $(variant-y)
diff --git a/arch/xtensa/boot/dts/Makefile b/arch/xtensa/boot/dts/Makefile
index 0b8d00cdae7c..720628c0d8b9 100644
--- a/arch/xtensa/boot/dts/Makefile
+++ b/arch/xtensa/boot/dts/Makefile
@@ -7,10 +7,7 @@
#
#
-BUILTIN_DTB_SOURCE := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
-ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"")
-obj-$(CONFIG_OF) += $(BUILTIN_DTB_SOURCE)
-endif
+obj-$(CONFIG_OF) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE))
# for CONFIG_OF_ALL_DTBS test
dtstree := $(srctree)/$(src)
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index 3f71d364ba90..cd225896c40f 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -205,7 +205,6 @@ BIT_OPS(change, "xor", )
#undef BIT_OP
#undef TEST_AND_BIT_OP
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 99ab3c1a3387..a1029a5b6a1d 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1433,7 +1433,7 @@ ENTRY(fast_syscall_spill_registers)
rsync
movi abi_arg0, SIGSEGV
- abi_call do_exit
+ abi_call make_task_dead
/* shouldn't return, so panic */
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 3e3e1a506bed..52c94ab5c205 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -420,3 +420,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 4b4dbeb2d612..9345007d474d 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -552,5 +552,5 @@ void __noreturn die(const char * str, struct pt_regs * regs, long err)
if (panic_on_oops)
panic("Fatal exception");
- do_exit(err);
+ make_task_dead(err);
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index fd6a70635962..06d0973a0d74 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -127,17 +127,16 @@ good_area:
goto do_sigbus;
BUG();
}
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
- /* No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
+ if (fault & VM_FAULT_RETRY) {
+ flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ /* No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+
+ goto retry;
}
mmap_read_unlock(mm);
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 07b642c1916a..8eb6ad1a3a1d 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -208,7 +208,7 @@ static int simdisk_detach(struct simdisk *dev)
static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
- struct simdisk *dev = PDE_DATA(file_inode(file));
+ struct simdisk *dev = pde_data(file_inode(file));
const char *s = dev->filename;
if (s) {
ssize_t n = simple_read_from_buffer(buf, size, ppos,
@@ -225,7 +225,7 @@ static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char *tmp = memdup_user_nul(buf, count);
- struct simdisk *dev = PDE_DATA(file_inode(file));
+ struct simdisk *dev = pde_data(file_inode(file));
int err;
if (IS_ERR(tmp))
diff --git a/block/bdev.c b/block/bdev.c
index 8bf93a19041b..102837a37051 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -24,7 +24,6 @@
#include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
-#include <linux/cleancache.h>
#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include "../fs/internal.h"
@@ -88,10 +87,6 @@ void invalidate_bdev(struct block_device *bdev)
lru_add_drain_all(); /* make sure all lru add caches are flushed */
invalidate_mapping_pages(mapping, 0, -1);
}
- /* 99% of the time, we don't need to flush the cleancache on the bdev.
- * But, for the strange corners, lets be cautious
- */
- cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(invalidate_bdev);
diff --git a/block/bio.c b/block/bio.c
index 0d400ba2dbd1..4312a8085396 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size)
offset = new_size - done;
else
offset = 0;
- zero_user(bv.bv_page, offset, bv.bv_len - offset);
+ zero_user(bv.bv_page, bv.bv_offset + offset,
+ bv.bv_len - offset);
truncated = true;
}
done += bv.bv_len;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -17,6 +17,21 @@
#include "blk-mq-tag.h"
/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
+/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
* to get tag when first time, the other shared-tag users could reserve
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a6d4780580fc..f3bf3358a3bb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2976,6 +2976,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
bio = bio_clone_fast(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
+ bio->bi_bdev = rq->q->disk->part0;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@@ -3284,7 +3285,7 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
struct blk_mq_hw_ctx *hctx)
{
- if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
+ if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)
return false;
if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
return false;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e20eadfcf5c8..9f32882ceb2f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -811,6 +811,9 @@ static void blk_release_queue(struct kobject *kobj)
bioset_exit(&q->bio_split);
+ if (blk_queue_has_srcu(q))
+ cleanup_srcu_struct(q->srcu);
+
ida_simple_remove(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
@@ -887,7 +890,6 @@ int blk_register_queue(struct gendisk *disk)
kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
mutex_unlock(&q->sysfs_lock);
- ret = 0;
unlock:
mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 85d919bf60c7..3ed5eaf3446a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->front_merges);
+SHOW_INT(deadline_async_depth_show, dd->async_depth);
SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
#undef SHOW_INT
#undef SHOW_JIFFIES
@@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
+STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#undef STORE_FUNCTION
#undef STORE_INT
diff --git a/certs/.gitignore b/certs/.gitignore
index 8c3763f80be3..9e42fe3e02f5 100644
--- a/certs/.gitignore
+++ b/certs/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
+/extract-cert
/x509_certificate_list
/x509_revocation_list
diff --git a/certs/Kconfig b/certs/Kconfig
index ae7f2e876a31..73d1350c223a 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -17,21 +17,19 @@ config MODULE_SIG_KEY
choice
prompt "Type of module signing key to be generated"
- default MODULE_SIG_KEY_TYPE_RSA
+ depends on MODULE_SIG || (IMA_APPRAISE_MODSIG && MODULES)
help
The type of module signing key type to generate. This option
does not apply if a #PKCS11 URI is used.
config MODULE_SIG_KEY_TYPE_RSA
bool "RSA"
- depends on MODULE_SIG || (IMA_APPRAISE_MODSIG && MODULES)
help
Use an RSA key for module signing.
config MODULE_SIG_KEY_TYPE_ECDSA
bool "ECDSA"
select CRYPTO_ECDSA
- depends on MODULE_SIG || (IMA_APPRAISE_MODSIG && MODULES)
help
Use an elliptic curve key (NIST P384) for module signing. Consider
using a strong hash like sha256 or sha384 for hashing modules.
diff --git a/certs/Makefile b/certs/Makefile
index 279433783b10..3ea7fe60823f 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -6,31 +6,21 @@
obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
-ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"")
+ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
else
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o
endif
-ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+quiet_cmd_extract_certs = CERT $@
+ cmd_extract_certs = $(obj)/extract-cert $(2) $@
-$(eval $(call config_filename,SYSTEM_TRUSTED_KEYS))
-
-# GCC doesn't include .incbin files in -MD generated dependencies (PR#66871)
$(obj)/system_certificates.o: $(obj)/x509_certificate_list
-# Cope with signing_key.x509 existing in $(srctree) not $(objtree)
-AFLAGS_system_certificates.o := -I$(srctree)
-
-quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2))
- cmd_extract_certs = scripts/extract-cert $(2) $@
+$(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE
+ $(call if_changed,extract_certs,$(if $(CONFIG_SYSTEM_TRUSTED_KEYS),$<,""))
targets += x509_certificate_list
-$(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
- $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
-endif # CONFIG_SYSTEM_TRUSTED_KEYRING
-
-clean-files := x509_certificate_list .x509.list x509_revocation_list
ifeq ($(CONFIG_MODULE_SIG),y)
SIGN_KEY = y
@@ -50,103 +40,54 @@ ifdef SIGN_KEY
# fail and that the kernel may be used afterwards.
#
###############################################################################
-ifndef CONFIG_MODULE_SIG_HASH
-$(error Could not determine digest type to use from kernel config)
-endif
-
-redirect_openssl = 2>&1
-quiet_redirect_openssl = 2>&1
-silent_redirect_openssl = 2>/dev/null
-openssl_available = $(shell openssl help 2>/dev/null && echo yes)
# We do it this way rather than having a boolean option for enabling an
# external private key, because 'make randconfig' might enable such a
# boolean option and we unfortunately can't make it depend on !RANDCONFIG.
-ifeq ($(CONFIG_MODULE_SIG_KEY),"certs/signing_key.pem")
+ifeq ($(CONFIG_MODULE_SIG_KEY),certs/signing_key.pem)
-ifeq ($(openssl_available),yes)
-X509TEXT=$(shell openssl x509 -in "certs/signing_key.pem" -text 2>/dev/null)
-endif
+keytype-$(CONFIG_MODULE_SIG_KEY_TYPE_ECDSA) := -newkey ec -pkeyopt ec_paramgen_curve:secp384r1
-# Support user changing key type
-ifdef CONFIG_MODULE_SIG_KEY_TYPE_ECDSA
-keytype_openssl = -newkey ec -pkeyopt ec_paramgen_curve:secp384r1
-ifeq ($(openssl_available),yes)
-$(if $(findstring id-ecPublicKey,$(X509TEXT)),,$(shell rm -f "certs/signing_key.pem"))
-endif
-endif # CONFIG_MODULE_SIG_KEY_TYPE_ECDSA
+quiet_cmd_gen_key = GENKEY $@
+ cmd_gen_key = openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
+ -batch -x509 -config $< \
+ -outform PEM -out $@ -keyout $@ $(keytype-y) 2>&1
-ifdef CONFIG_MODULE_SIG_KEY_TYPE_RSA
-ifeq ($(openssl_available),yes)
-$(if $(findstring rsaEncryption,$(X509TEXT)),,$(shell rm -f "certs/signing_key.pem"))
-endif
-endif # CONFIG_MODULE_SIG_KEY_TYPE_RSA
-
-$(obj)/signing_key.pem: $(obj)/x509.genkey
- @$(kecho) "###"
- @$(kecho) "### Now generating an X.509 key pair to be used for signing modules."
- @$(kecho) "###"
- @$(kecho) "### If this takes a long time, you might wish to run rngd in the"
- @$(kecho) "### background to keep the supply of entropy topped up. It"
- @$(kecho) "### needs to be run as root, and uses a hardware random"
- @$(kecho) "### number generator if one is available."
- @$(kecho) "###"
- $(Q)openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
- -batch -x509 -config $(obj)/x509.genkey \
- -outform PEM -out $(obj)/signing_key.pem \
- -keyout $(obj)/signing_key.pem \
- $(keytype_openssl) \
- $($(quiet)redirect_openssl)
- @$(kecho) "###"
- @$(kecho) "### Key pair generated."
- @$(kecho) "###"
+$(obj)/signing_key.pem: $(obj)/x509.genkey FORCE
+ $(call if_changed,gen_key)
+
+targets += signing_key.pem
+quiet_cmd_copy_x509_config = COPY $@
+ cmd_copy_x509_config = cat $(srctree)/$(src)/default_x509.genkey > $@
+
+# You can provide your own config file. If not present, copy the default one.
$(obj)/x509.genkey:
- @$(kecho) Generating X.509 key generation config
- @echo >$@ "[ req ]"
- @echo >>$@ "default_bits = 4096"
- @echo >>$@ "distinguished_name = req_distinguished_name"
- @echo >>$@ "prompt = no"
- @echo >>$@ "string_mask = utf8only"
- @echo >>$@ "x509_extensions = myexts"
- @echo >>$@
- @echo >>$@ "[ req_distinguished_name ]"
- @echo >>$@ "#O = Unspecified company"
- @echo >>$@ "CN = Build time autogenerated kernel key"
- @echo >>$@ "#emailAddress = unspecified.user@unspecified.company"
- @echo >>$@
- @echo >>$@ "[ myexts ]"
- @echo >>$@ "basicConstraints=critical,CA:FALSE"
- @echo >>$@ "keyUsage=digitalSignature"
- @echo >>$@ "subjectKeyIdentifier=hash"
- @echo >>$@ "authorityKeyIdentifier=keyid"
-endif # CONFIG_MODULE_SIG_KEY
+ $(call cmd,copy_x509_config)
-$(eval $(call config_filename,MODULE_SIG_KEY))
+endif # CONFIG_MODULE_SIG_KEY
# If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it
-ifeq ($(patsubst pkcs11:%,%,$(firstword $(MODULE_SIG_KEY_FILENAME))),$(firstword $(MODULE_SIG_KEY_FILENAME)))
-X509_DEP := $(MODULE_SIG_KEY_SRCPREFIX)$(MODULE_SIG_KEY_FILENAME)
+ifneq ($(filter-out pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
+X509_DEP := $(CONFIG_MODULE_SIG_KEY)
endif
-# GCC PR#66871 again.
$(obj)/system_certificates.o: $(obj)/signing_key.x509
-targets += signing_key.x509
-$(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE
- $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
+$(obj)/signing_key.x509: $(X509_DEP) $(obj)/extract-cert FORCE
+ $(call if_changed,extract_certs,$(if $(CONFIG_MODULE_SIG_KEY),$(if $(X509_DEP),$<,$(CONFIG_MODULE_SIG_KEY)),""))
endif # CONFIG_MODULE_SIG
-ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y)
-
-$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS))
+targets += signing_key.x509
$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list
-quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2))
- cmd_extract_certs = scripts/extract-cert $(2) $@
+$(obj)/x509_revocation_list: $(CONFIG_SYSTEM_REVOCATION_KEYS) $(obj)/extract-cert FORCE
+ $(call if_changed,extract_certs,$(if $(CONFIG_SYSTEM_REVOCATION_KEYS),$<,""))
targets += x509_revocation_list
-$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE
- $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS))
-endif
+
+hostprogs := extract-cert
+
+HOSTCFLAGS_extract-cert.o = $(shell pkg-config --cflags libcrypto 2> /dev/null)
+HOSTLDLIBS_extract-cert = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto)
diff --git a/certs/default_x509.genkey b/certs/default_x509.genkey
new file mode 100644
index 000000000000..d4c6628cb8e5
--- /dev/null
+++ b/certs/default_x509.genkey
@@ -0,0 +1,17 @@
+[ req ]
+default_bits = 4096
+distinguished_name = req_distinguished_name
+prompt = no
+string_mask = utf8only
+x509_extensions = myexts
+
+[ req_distinguished_name ]
+#O = Unspecified company
+CN = Build time autogenerated kernel key
+#emailAddress = unspecified.user@unspecified.company
+
+[ myexts ]
+basicConstraints=critical,CA:FALSE
+keyUsage=digitalSignature
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
diff --git a/scripts/extract-cert.c b/certs/extract-cert.c
index 3bc48c726c41..f7ef7862f207 100644
--- a/scripts/extract-cert.c
+++ b/certs/extract-cert.c
@@ -29,7 +29,7 @@ static __attribute__((noreturn))
void format(void)
{
fprintf(stderr,
- "Usage: scripts/extract-cert <source> <dest>\n");
+ "Usage: extract-cert <source> <dest>\n");
exit(2);
}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 94bfa32cc6a1..442765219c37 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1928,5 +1928,3 @@ source "crypto/asymmetric_keys/Kconfig"
source "certs/Kconfig"
endif # if CRYPTO
-
-source "lib/crypto/Kconfig"
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 1814d2c5188a..eb5fe84efb83 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -67,7 +67,7 @@ out:
complete_all(&param->larval->completion);
crypto_alg_put(&param->larval->alg);
kfree(param);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
}
static int cryptomgr_schedule_probe(struct crypto_larval *larval)
@@ -190,7 +190,7 @@ skiptest:
crypto_alg_tested(param->driver, err);
kfree(param);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
}
static int cryptomgr_schedule_test(struct crypto_alg *alg)
diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c
index c1ec087dca13..023172ca22ef 100644
--- a/drivers/accessibility/speakup/speakup_acntpc.c
+++ b/drivers/accessibility/speakup/speakup_acntpc.c
@@ -247,7 +247,7 @@ static void synth_flush(struct spk_synth *synth)
static int synth_probe(struct spk_synth *synth)
{
unsigned int port_val = 0;
- int i = 0;
+ int i;
pr_info("Probing for %s.\n", synth->long_name);
if (port_forced) {
diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c
index 92838d3ae9eb..a9dd5c45d237 100644
--- a/drivers/accessibility/speakup/speakup_dtlk.c
+++ b/drivers/accessibility/speakup/speakup_dtlk.c
@@ -316,7 +316,7 @@ static struct synth_settings *synth_interrogate(struct spk_synth *synth)
static int synth_probe(struct spk_synth *synth)
{
unsigned int port_val = 0;
- int i = 0;
+ int i;
struct synth_settings *sp;
pr_info("Probing for DoubleTalk.\n");
diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c
index 311f4aa0be22..1618be87bff1 100644
--- a/drivers/accessibility/speakup/speakup_keypc.c
+++ b/drivers/accessibility/speakup/speakup_keypc.c
@@ -254,7 +254,7 @@ static void synth_flush(struct spk_synth *synth)
static int synth_probe(struct spk_synth *synth)
{
unsigned int port_val = 0;
- int i = 0;
+ int i;
pr_info("Probing for %s.\n", synth->long_name);
if (port_forced) {
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 705331e3d87a..ba45541b1f1f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -520,6 +520,28 @@ config ACPI_CONFIGFS
userspace. The configurable ACPI groups will be visible under
/config/acpi, assuming configfs is mounted under /config.
+config ACPI_PFRUT
+ tristate "ACPI Platform Firmware Runtime Update and Telemetry"
+ depends on 64BIT
+ help
+ This mechanism allows certain pieces of the platform firmware
+ to be updated on the fly while the system is running (runtime)
+ without the need to restart it, which is key in the cases when
+ the system needs to be available 100% of the time and it cannot
+ afford the downtime related to restarting it, or when the work
+ carried out by the system is particularly important, so it cannot
+ be interrupted, and it is not practical to wait until it is complete.
+
+ The existing firmware code can be modified (driver update) or
+ extended by adding new code to the firmware (code injection).
+
+ Besides, the telemetry driver allows user space to fetch telemetry
+ data from the firmware with the help of the Platform Firmware Runtime
+ Telemetry interface.
+
+ To compile the drivers as modules, choose M here:
+ the modules will be called pfr_update and pfr_telemetry.
+
if ARM64
source "drivers/acpi/arm64/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 08c2d985c57c..bb757148e7ba 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
# ACPI Boot-Time Table Parsing
#
ifeq ($(CONFIG_ACPI_CUSTOM_DSDT),y)
-tables.o: $(src)/../../include/$(subst $\",,$(CONFIG_ACPI_CUSTOM_DSDT_FILE)) ;
+tables.o: $(src)/../../include/$(CONFIG_ACPI_CUSTOM_DSDT_FILE) ;
endif
@@ -103,6 +103,7 @@ obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o
obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o
obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o
obj-$(CONFIG_ACPI_PPTT) += pptt.o
+obj-$(CONFIG_ACPI_PFRUT) += pfr_update.o pfr_telemetry.o
# processor has its own "processor." module_param namespace
processor-y := processor_driver.o
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 2b958b426b03..e7934ba79b02 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -102,6 +102,8 @@ static int fch_misc_setup(struct apd_private_data *pdata)
resource_size(rentry->res));
break;
}
+ if (!clk_data->base)
+ return -ENOMEM;
acpi_dev_free_resource_list(&resource_list);
diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c
index 41e3ebd204ff..a12b55d81209 100644
--- a/drivers/acpi/acpi_pcc.c
+++ b/drivers/acpi/acpi_pcc.c
@@ -31,7 +31,7 @@ struct pcc_data {
struct acpi_pcc_info ctx;
};
-struct acpi_pcc_info pcc_ctx;
+static struct acpi_pcc_info pcc_ctx;
static void pcc_rx_callback(struct mbox_client *cl, void *m)
{
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index a9d2de403c0c..866560cbb082 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -915,30 +915,31 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
{
- int ret_val = 0;
void __iomem *vaddr = NULL;
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cpc_reg *reg = &reg_res->cpc_entry.reg;
if (reg_res->type == ACPI_TYPE_INTEGER) {
*val = reg_res->cpc_entry.int_value;
- return ret_val;
+ return 0;
}
*val = 0;
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
u32 width = 8 << (reg->access_width - 1);
+ u32 val_u32;
acpi_status status;
status = acpi_os_read_port((acpi_io_address)reg->address,
- (u32 *)val, width);
+ &val_u32, width);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to read SystemIO port %llx\n",
reg->address);
return -EFAULT;
}
+ *val = val_u32;
return 0;
} else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
@@ -966,10 +967,10 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
default:
pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
reg->bit_width, pcc_ss_id);
- ret_val = -EFAULT;
+ return -EFAULT;
}
- return ret_val;
+ return 0;
}
static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c
index e7ab0fc90db9..c0da24c9f8c3 100644
--- a/drivers/acpi/dptf/dptf_pch_fivr.c
+++ b/drivers/acpi/dptf/dptf_pch_fivr.c
@@ -151,6 +151,7 @@ static int pch_fivr_remove(struct platform_device *pdev)
static const struct acpi_device_id pch_fivr_device_ids[] = {
{"INTC1045", 0},
{"INTC1049", 0},
+ {"INTC10A3", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, pch_fivr_device_ids);
diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index a24d5d7aa117..dc1f52a5b3f4 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -231,6 +231,8 @@ static const struct acpi_device_id int3407_device_ids[] = {
{"INTC1050", 0},
{"INTC1060", 0},
{"INTC1061", 0},
+ {"INTC10A4", 0},
+ {"INTC10A5", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, int3407_device_ids);
diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
index da5d5f0be2f2..42a556346548 100644
--- a/drivers/acpi/dptf/int340x_thermal.c
+++ b/drivers/acpi/dptf/int340x_thermal.c
@@ -37,6 +37,12 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
{"INTC1050"},
{"INTC1060"},
{"INTC1061"},
+ {"INTC10A0"},
+ {"INTC10A1"},
+ {"INTC10A2"},
+ {"INTC10A3"},
+ {"INTC10A4"},
+ {"INTC10A5"},
{""},
};
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index dc9a6efa514b..dd9bb8ca2244 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -10,4 +10,5 @@
{"INT3404", }, /* Fan */ \
{"INTC1044", }, /* Fan for Tiger Lake generation */ \
{"INTC1048", }, /* Fan for Alder Lake generation */ \
+ {"INTC10A2", }, /* Fan for Raptor Lake generation */ \
{"PNP0C0B", } /* Generic ACPI fan */
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 1db3a2f81763..457e11d851b8 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -14,7 +14,7 @@
int early_acpi_osi_init(void);
int acpi_osi_init(void);
acpi_status acpi_os_initialize1(void);
-int acpi_scan_init(void);
+void acpi_scan_init(void);
#ifdef CONFIG_PCI
void acpi_pci_root_init(void);
void acpi_pci_link_init(void);
diff --git a/drivers/acpi/pfr_telemetry.c b/drivers/acpi/pfr_telemetry.c
new file mode 100644
index 000000000000..9abf350bd7a5
--- /dev/null
+++ b/drivers/acpi/pfr_telemetry.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACPI Platform Firmware Runtime Telemetry driver
+ *
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Chen Yu <yu.c.chen@intel.com>
+ *
+ * This driver allows user space to fetch telemetry data from the
+ * firmware with the help of the Platform Firmware Runtime Telemetry
+ * interface.
+ */
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <linux/uuid.h>
+
+#include <uapi/linux/pfrut.h>
+
+#define PFRT_LOG_EXEC_IDX 0
+#define PFRT_LOG_HISTORY_IDX 1
+
+#define PFRT_LOG_ERR 0
+#define PFRT_LOG_WARN 1
+#define PFRT_LOG_INFO 2
+#define PFRT_LOG_VERB 4
+
+#define PFRT_FUNC_SET_LEV 1
+#define PFRT_FUNC_GET_LEV 2
+#define PFRT_FUNC_GET_DATA 3
+
+#define PFRT_REVID_1 1
+#define PFRT_REVID_2 2
+#define PFRT_DEFAULT_REV_ID PFRT_REVID_1
+
+enum log_index {
+ LOG_STATUS_IDX = 0,
+ LOG_EXT_STATUS_IDX = 1,
+ LOG_MAX_SZ_IDX = 2,
+ LOG_CHUNK1_LO_IDX = 3,
+ LOG_CHUNK1_HI_IDX = 4,
+ LOG_CHUNK1_SZ_IDX = 5,
+ LOG_CHUNK2_LO_IDX = 6,
+ LOG_CHUNK2_HI_IDX = 7,
+ LOG_CHUNK2_SZ_IDX = 8,
+ LOG_ROLLOVER_CNT_IDX = 9,
+ LOG_RESET_CNT_IDX = 10,
+ LOG_NR_IDX
+};
+
+struct pfrt_log_device {
+ int index;
+ struct pfrt_log_info info;
+ struct device *parent_dev;
+ struct miscdevice miscdev;
+};
+
+/* pfrt_guid is the parameter for _DSM method */
+static const guid_t pfrt_log_guid =
+ GUID_INIT(0x75191659, 0x8178, 0x4D9D, 0xB8, 0x8F, 0xAC, 0x5E,
+ 0x5E, 0x93, 0xE8, 0xBF);
+
+static DEFINE_IDA(pfrt_log_ida);
+
+static inline struct pfrt_log_device *to_pfrt_log_dev(struct file *file)
+{
+ return container_of(file->private_data, struct pfrt_log_device, miscdev);
+}
+
+static int get_pfrt_log_data_info(struct pfrt_log_data_info *data_info,
+ struct pfrt_log_device *pfrt_log_dev)
+{
+ acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev);
+ union acpi_object *out_obj, in_obj, in_buf;
+ int ret = -EBUSY;
+
+ memset(data_info, 0, sizeof(*data_info));
+ memset(&in_obj, 0, sizeof(in_obj));
+ memset(&in_buf, 0, sizeof(in_buf));
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = 1;
+ in_obj.package.elements = &in_buf;
+ in_buf.type = ACPI_TYPE_INTEGER;
+ in_buf.integer.value = pfrt_log_dev->info.log_type;
+
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid,
+ pfrt_log_dev->info.log_revid, PFRT_FUNC_GET_DATA,
+ &in_obj, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return -EINVAL;
+
+ if (out_obj->package.count < LOG_NR_IDX ||
+ out_obj->package.elements[LOG_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_MAX_SZ_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK1_LO_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK1_HI_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK1_SZ_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK2_LO_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK2_HI_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_CHUNK2_SZ_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_ROLLOVER_CNT_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[LOG_RESET_CNT_IDX].type != ACPI_TYPE_INTEGER)
+ goto free_acpi_buffer;
+
+ data_info->status = out_obj->package.elements[LOG_STATUS_IDX].integer.value;
+ data_info->ext_status =
+ out_obj->package.elements[LOG_EXT_STATUS_IDX].integer.value;
+ if (data_info->status != DSM_SUCCEED) {
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", data_info->status);
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n",
+ data_info->ext_status);
+ goto free_acpi_buffer;
+ }
+
+ data_info->max_data_size =
+ out_obj->package.elements[LOG_MAX_SZ_IDX].integer.value;
+ data_info->chunk1_addr_lo =
+ out_obj->package.elements[LOG_CHUNK1_LO_IDX].integer.value;
+ data_info->chunk1_addr_hi =
+ out_obj->package.elements[LOG_CHUNK1_HI_IDX].integer.value;
+ data_info->chunk1_size =
+ out_obj->package.elements[LOG_CHUNK1_SZ_IDX].integer.value;
+ data_info->chunk2_addr_lo =
+ out_obj->package.elements[LOG_CHUNK2_LO_IDX].integer.value;
+ data_info->chunk2_addr_hi =
+ out_obj->package.elements[LOG_CHUNK2_HI_IDX].integer.value;
+ data_info->chunk2_size =
+ out_obj->package.elements[LOG_CHUNK2_SZ_IDX].integer.value;
+ data_info->rollover_cnt =
+ out_obj->package.elements[LOG_ROLLOVER_CNT_IDX].integer.value;
+ data_info->reset_cnt =
+ out_obj->package.elements[LOG_RESET_CNT_IDX].integer.value;
+
+ ret = 0;
+
+free_acpi_buffer:
+ kfree(out_obj);
+
+ return ret;
+}
+
+static int set_pfrt_log_level(int level, struct pfrt_log_device *pfrt_log_dev)
+{
+ acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev);
+ union acpi_object *out_obj, *obj, in_obj, in_buf;
+ enum pfru_dsm_status status, ext_status;
+ int ret = 0;
+
+ memset(&in_obj, 0, sizeof(in_obj));
+ memset(&in_buf, 0, sizeof(in_buf));
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = 1;
+ in_obj.package.elements = &in_buf;
+ in_buf.type = ACPI_TYPE_INTEGER;
+ in_buf.integer.value = level;
+
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid,
+ pfrt_log_dev->info.log_revid, PFRT_FUNC_SET_LEV,
+ &in_obj, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return -EINVAL;
+
+ obj = &out_obj->package.elements[0];
+ status = obj->integer.value;
+ if (status != DSM_SUCCEED) {
+ obj = &out_obj->package.elements[1];
+ ext_status = obj->integer.value;
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", status);
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n", ext_status);
+ ret = -EBUSY;
+ }
+
+ kfree(out_obj);
+
+ return ret;
+}
+
+static int get_pfrt_log_level(struct pfrt_log_device *pfrt_log_dev)
+{
+ acpi_handle handle = ACPI_HANDLE(pfrt_log_dev->parent_dev);
+ union acpi_object *out_obj, *obj;
+ enum pfru_dsm_status status, ext_status;
+ int ret = -EBUSY;
+
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfrt_log_guid,
+ pfrt_log_dev->info.log_revid, PFRT_FUNC_GET_LEV,
+ NULL, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return -EINVAL;
+
+ obj = &out_obj->package.elements[0];
+ if (obj->type != ACPI_TYPE_INTEGER)
+ goto free_acpi_buffer;
+
+ status = obj->integer.value;
+ if (status != DSM_SUCCEED) {
+ obj = &out_obj->package.elements[1];
+ ext_status = obj->integer.value;
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Status:%d\n", status);
+ dev_dbg(pfrt_log_dev->parent_dev, "Error Extend Status:%d\n", ext_status);
+ goto free_acpi_buffer;
+ }
+
+ obj = &out_obj->package.elements[2];
+ if (obj->type != ACPI_TYPE_INTEGER)
+ goto free_acpi_buffer;
+
+ ret = obj->integer.value;
+
+free_acpi_buffer:
+ kfree(out_obj);
+
+ return ret;
+}
+
+static int valid_log_level(u32 level)
+{
+ return level == PFRT_LOG_ERR || level == PFRT_LOG_WARN ||
+ level == PFRT_LOG_INFO || level == PFRT_LOG_VERB;
+}
+
+static int valid_log_type(u32 type)
+{
+ return type == PFRT_LOG_EXEC_IDX || type == PFRT_LOG_HISTORY_IDX;
+}
+
+static inline int valid_log_revid(u32 id)
+{
+ return id == PFRT_REVID_1 || id == PFRT_REVID_2;
+}
+
+static long pfrt_log_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct pfrt_log_device *pfrt_log_dev = to_pfrt_log_dev(file);
+ struct pfrt_log_data_info data_info;
+ struct pfrt_log_info info;
+ void __user *p;
+ int ret = 0;
+
+ p = (void __user *)arg;
+
+ switch (cmd) {
+ case PFRT_LOG_IOC_SET_INFO:
+ if (copy_from_user(&info, p, sizeof(info)))
+ return -EFAULT;
+
+ if (valid_log_revid(info.log_revid))
+ pfrt_log_dev->info.log_revid = info.log_revid;
+
+ if (valid_log_level(info.log_level)) {
+ ret = set_pfrt_log_level(info.log_level, pfrt_log_dev);
+ if (ret < 0)
+ return ret;
+
+ pfrt_log_dev->info.log_level = info.log_level;
+ }
+
+ if (valid_log_type(info.log_type))
+ pfrt_log_dev->info.log_type = info.log_type;
+
+ return 0;
+
+ case PFRT_LOG_IOC_GET_INFO:
+ info.log_level = get_pfrt_log_level(pfrt_log_dev);
+ if (ret < 0)
+ return ret;
+
+ info.log_type = pfrt_log_dev->info.log_type;
+ info.log_revid = pfrt_log_dev->info.log_revid;
+ if (copy_to_user(p, &info, sizeof(info)))
+ return -EFAULT;
+
+ return 0;
+
+ case PFRT_LOG_IOC_GET_DATA_INFO:
+ ret = get_pfrt_log_data_info(&data_info, pfrt_log_dev);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(p, &data_info, sizeof(struct pfrt_log_data_info)))
+ return -EFAULT;
+
+ return 0;
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int
+pfrt_log_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct pfrt_log_device *pfrt_log_dev;
+ struct pfrt_log_data_info info;
+ unsigned long psize, vsize;
+ phys_addr_t base_addr;
+ int ret;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EROFS;
+
+ /* changing from read to write with mprotect is not allowed */
+ vma->vm_flags &= ~VM_MAYWRITE;
+
+ pfrt_log_dev = to_pfrt_log_dev(file);
+
+ ret = get_pfrt_log_data_info(&info, pfrt_log_dev);
+ if (ret)
+ return ret;
+
+ base_addr = (phys_addr_t)((info.chunk2_addr_hi << 32) | info.chunk2_addr_lo);
+ /* pfrt update has not been launched yet */
+ if (!base_addr)
+ return -ENODEV;
+
+ psize = info.max_data_size;
+ /* base address and total buffer size must be page aligned */
+ if (!PAGE_ALIGNED(base_addr) || !PAGE_ALIGNED(psize))
+ return -ENODEV;
+
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize > psize)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start, PFN_DOWN(base_addr),
+ vsize, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static const struct file_operations acpi_pfrt_log_fops = {
+ .owner = THIS_MODULE,
+ .mmap = pfrt_log_mmap,
+ .unlocked_ioctl = pfrt_log_ioctl,
+ .llseek = noop_llseek,
+};
+
+static int acpi_pfrt_log_remove(struct platform_device *pdev)
+{
+ struct pfrt_log_device *pfrt_log_dev = platform_get_drvdata(pdev);
+
+ misc_deregister(&pfrt_log_dev->miscdev);
+
+ return 0;
+}
+
+static void pfrt_log_put_idx(void *data)
+{
+ struct pfrt_log_device *pfrt_log_dev = data;
+
+ ida_free(&pfrt_log_ida, pfrt_log_dev->index);
+}
+
+static int acpi_pfrt_log_probe(struct platform_device *pdev)
+{
+ acpi_handle handle = ACPI_HANDLE(&pdev->dev);
+ struct pfrt_log_device *pfrt_log_dev;
+ int ret;
+
+ if (!acpi_has_method(handle, "_DSM")) {
+ dev_dbg(&pdev->dev, "Missing _DSM\n");
+ return -ENODEV;
+ }
+
+ pfrt_log_dev = devm_kzalloc(&pdev->dev, sizeof(*pfrt_log_dev), GFP_KERNEL);
+ if (!pfrt_log_dev)
+ return -ENOMEM;
+
+ ret = ida_alloc(&pfrt_log_ida, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ pfrt_log_dev->index = ret;
+ ret = devm_add_action_or_reset(&pdev->dev, pfrt_log_put_idx, pfrt_log_dev);
+ if (ret)
+ return ret;
+
+ pfrt_log_dev->info.log_revid = PFRT_DEFAULT_REV_ID;
+ pfrt_log_dev->parent_dev = &pdev->dev;
+
+ pfrt_log_dev->miscdev.minor = MISC_DYNAMIC_MINOR;
+ pfrt_log_dev->miscdev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "pfrt%d",
+ pfrt_log_dev->index);
+ if (!pfrt_log_dev->miscdev.name)
+ return -ENOMEM;
+
+ pfrt_log_dev->miscdev.nodename = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "acpi_pfr_telemetry%d",
+ pfrt_log_dev->index);
+ if (!pfrt_log_dev->miscdev.nodename)
+ return -ENOMEM;
+
+ pfrt_log_dev->miscdev.fops = &acpi_pfrt_log_fops;
+ pfrt_log_dev->miscdev.parent = &pdev->dev;
+
+ ret = misc_register(&pfrt_log_dev->miscdev);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, pfrt_log_dev);
+
+ return 0;
+}
+
+static const struct acpi_device_id acpi_pfrt_log_ids[] = {
+ {"INTC1081"},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, acpi_pfrt_log_ids);
+
+static struct platform_driver acpi_pfrt_log_driver = {
+ .driver = {
+ .name = "pfr_telemetry",
+ .acpi_match_table = acpi_pfrt_log_ids,
+ },
+ .probe = acpi_pfrt_log_probe,
+ .remove = acpi_pfrt_log_remove,
+};
+module_platform_driver(acpi_pfrt_log_driver);
+
+MODULE_DESCRIPTION("Platform Firmware Runtime Update Telemetry driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c
new file mode 100644
index 000000000000..6bb0b778b5da
--- /dev/null
+++ b/drivers/acpi/pfr_update.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACPI Platform Firmware Runtime Update Device driver
+ *
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Chen Yu <yu.c.chen@intel.com>
+ *
+ * pfr_update driver is used for Platform Firmware Runtime
+ * Update, which includes the code injection and driver update.
+ */
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/efi.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <linux/uuid.h>
+
+#include <uapi/linux/pfrut.h>
+
+#define PFRU_FUNC_STANDARD_QUERY 0
+#define PFRU_FUNC_QUERY_UPDATE_CAP 1
+#define PFRU_FUNC_QUERY_BUF 2
+#define PFRU_FUNC_START 3
+
+#define PFRU_CODE_INJECT_TYPE 1
+#define PFRU_DRIVER_UPDATE_TYPE 2
+
+#define PFRU_REVID_1 1
+#define PFRU_REVID_2 2
+#define PFRU_DEFAULT_REV_ID PFRU_REVID_1
+
+enum cap_index {
+ CAP_STATUS_IDX = 0,
+ CAP_UPDATE_IDX = 1,
+ CAP_CODE_TYPE_IDX = 2,
+ CAP_FW_VER_IDX = 3,
+ CAP_CODE_RT_VER_IDX = 4,
+ CAP_DRV_TYPE_IDX = 5,
+ CAP_DRV_RT_VER_IDX = 6,
+ CAP_DRV_SVN_IDX = 7,
+ CAP_PLAT_ID_IDX = 8,
+ CAP_OEM_ID_IDX = 9,
+ CAP_OEM_INFO_IDX = 10,
+ CAP_NR_IDX
+};
+
+enum buf_index {
+ BUF_STATUS_IDX = 0,
+ BUF_EXT_STATUS_IDX = 1,
+ BUF_ADDR_LOW_IDX = 2,
+ BUF_ADDR_HI_IDX = 3,
+ BUF_SIZE_IDX = 4,
+ BUF_NR_IDX
+};
+
+enum update_index {
+ UPDATE_STATUS_IDX = 0,
+ UPDATE_EXT_STATUS_IDX = 1,
+ UPDATE_AUTH_TIME_LOW_IDX = 2,
+ UPDATE_AUTH_TIME_HI_IDX = 3,
+ UPDATE_EXEC_TIME_LOW_IDX = 4,
+ UPDATE_EXEC_TIME_HI_IDX = 5,
+ UPDATE_NR_IDX
+};
+
+enum pfru_start_action {
+ START_STAGE = 0,
+ START_ACTIVATE = 1,
+ START_STAGE_ACTIVATE = 2,
+};
+
+struct pfru_device {
+ u32 rev_id, index;
+ struct device *parent_dev;
+ struct miscdevice miscdev;
+};
+
+static DEFINE_IDA(pfru_ida);
+
+/*
+ * Manual reference:
+ * https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf
+ *
+ * pfru_guid is the parameter for _DSM method
+ */
+static const guid_t pfru_guid =
+ GUID_INIT(0xECF9533B, 0x4A3C, 0x4E89, 0x93, 0x9E, 0xC7, 0x71,
+ 0x12, 0x60, 0x1C, 0x6D);
+
+/* pfru_code_inj_guid is the UUID to identify code injection EFI capsule file */
+static const guid_t pfru_code_inj_guid =
+ GUID_INIT(0xB2F84B79, 0x7B6E, 0x4E45, 0x88, 0x5F, 0x3F, 0xB9,
+ 0xBB, 0x18, 0x54, 0x02);
+
+/* pfru_drv_update_guid is the UUID to identify driver update EFI capsule file */
+static const guid_t pfru_drv_update_guid =
+ GUID_INIT(0x4569DD8C, 0x75F1, 0x429A, 0xA3, 0xD6, 0x24, 0xDE,
+ 0x80, 0x97, 0xA0, 0xDF);
+
+static inline int pfru_valid_revid(u32 id)
+{
+ return id == PFRU_REVID_1 || id == PFRU_REVID_2;
+}
+
+static inline struct pfru_device *to_pfru_dev(struct file *file)
+{
+ return container_of(file->private_data, struct pfru_device, miscdev);
+}
+
+static int query_capability(struct pfru_update_cap_info *cap_hdr,
+ struct pfru_device *pfru_dev)
+{
+ acpi_handle handle = ACPI_HANDLE(pfru_dev->parent_dev);
+ union acpi_object *out_obj;
+ int ret = -EINVAL;
+
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid,
+ pfru_dev->rev_id,
+ PFRU_FUNC_QUERY_UPDATE_CAP,
+ NULL, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return ret;
+
+ if (out_obj->package.count < CAP_NR_IDX ||
+ out_obj->package.elements[CAP_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_UPDATE_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_CODE_TYPE_IDX].type != ACPI_TYPE_BUFFER ||
+ out_obj->package.elements[CAP_FW_VER_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_CODE_RT_VER_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_DRV_TYPE_IDX].type != ACPI_TYPE_BUFFER ||
+ out_obj->package.elements[CAP_DRV_RT_VER_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_DRV_SVN_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[CAP_PLAT_ID_IDX].type != ACPI_TYPE_BUFFER ||
+ out_obj->package.elements[CAP_OEM_ID_IDX].type != ACPI_TYPE_BUFFER ||
+ out_obj->package.elements[CAP_OEM_INFO_IDX].type != ACPI_TYPE_BUFFER)
+ goto free_acpi_buffer;
+
+ cap_hdr->status = out_obj->package.elements[CAP_STATUS_IDX].integer.value;
+ if (cap_hdr->status != DSM_SUCCEED) {
+ ret = -EBUSY;
+ dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", cap_hdr->status);
+ goto free_acpi_buffer;
+ }
+
+ cap_hdr->update_cap = out_obj->package.elements[CAP_UPDATE_IDX].integer.value;
+ memcpy(&cap_hdr->code_type,
+ out_obj->package.elements[CAP_CODE_TYPE_IDX].buffer.pointer,
+ out_obj->package.elements[CAP_CODE_TYPE_IDX].buffer.length);
+ cap_hdr->fw_version =
+ out_obj->package.elements[CAP_FW_VER_IDX].integer.value;
+ cap_hdr->code_rt_version =
+ out_obj->package.elements[CAP_CODE_RT_VER_IDX].integer.value;
+ memcpy(&cap_hdr->drv_type,
+ out_obj->package.elements[CAP_DRV_TYPE_IDX].buffer.pointer,
+ out_obj->package.elements[CAP_DRV_TYPE_IDX].buffer.length);
+ cap_hdr->drv_rt_version =
+ out_obj->package.elements[CAP_DRV_RT_VER_IDX].integer.value;
+ cap_hdr->drv_svn =
+ out_obj->package.elements[CAP_DRV_SVN_IDX].integer.value;
+ memcpy(&cap_hdr->platform_id,
+ out_obj->package.elements[CAP_PLAT_ID_IDX].buffer.pointer,
+ out_obj->package.elements[CAP_PLAT_ID_IDX].buffer.length);
+ memcpy(&cap_hdr->oem_id,
+ out_obj->package.elements[CAP_OEM_ID_IDX].buffer.pointer,
+ out_obj->package.elements[CAP_OEM_ID_IDX].buffer.length);
+ cap_hdr->oem_info_len =
+ out_obj->package.elements[CAP_OEM_INFO_IDX].buffer.length;
+
+ ret = 0;
+
+free_acpi_buffer:
+ kfree(out_obj);
+
+ return ret;
+}
+
+static int query_buffer(struct pfru_com_buf_info *info,
+ struct pfru_device *pfru_dev)
+{
+ acpi_handle handle = ACPI_HANDLE(pfru_dev->parent_dev);
+ union acpi_object *out_obj;
+ int ret = -EINVAL;
+
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid,
+ pfru_dev->rev_id, PFRU_FUNC_QUERY_BUF,
+ NULL, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return ret;
+
+ if (out_obj->package.count < BUF_NR_IDX ||
+ out_obj->package.elements[BUF_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[BUF_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[BUF_ADDR_LOW_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[BUF_ADDR_HI_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[BUF_SIZE_IDX].type != ACPI_TYPE_INTEGER)
+ goto free_acpi_buffer;
+
+ info->status = out_obj->package.elements[BUF_STATUS_IDX].integer.value;
+ info->ext_status =
+ out_obj->package.elements[BUF_EXT_STATUS_IDX].integer.value;
+ if (info->status != DSM_SUCCEED) {
+ ret = -EBUSY;
+ dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", info->status);
+ dev_dbg(pfru_dev->parent_dev, "Error Extended Status:%d\n", info->ext_status);
+
+ goto free_acpi_buffer;
+ }
+
+ info->addr_lo =
+ out_obj->package.elements[BUF_ADDR_LOW_IDX].integer.value;
+ info->addr_hi =
+ out_obj->package.elements[BUF_ADDR_HI_IDX].integer.value;
+ info->buf_size = out_obj->package.elements[BUF_SIZE_IDX].integer.value;
+
+ ret = 0;
+
+free_acpi_buffer:
+ kfree(out_obj);
+
+ return ret;
+}
+
+static int get_image_type(const struct efi_manage_capsule_image_header *img_hdr,
+ struct pfru_device *pfru_dev)
+{
+ const efi_guid_t *image_type_id = &img_hdr->image_type_id;
+
+ /* check whether this is a code injection or driver update */
+ if (guid_equal(image_type_id, &pfru_code_inj_guid))
+ return PFRU_CODE_INJECT_TYPE;
+
+ if (guid_equal(image_type_id, &pfru_drv_update_guid))
+ return PFRU_DRIVER_UPDATE_TYPE;
+
+ return -EINVAL;
+}
+
+static int adjust_efi_size(const struct efi_manage_capsule_image_header *img_hdr,
+ int size)
+{
+ /*
+ * The (u64 hw_ins) was introduced in UEFI spec version 2,
+ * and (u64 capsule_support) was introduced in version 3.
+ * The size needs to be adjusted accordingly. That is to
+ * say, version 1 should subtract the size of hw_ins+capsule_support,
+ * and version 2 should sbstract the size of capsule_support.
+ */
+ size += sizeof(struct efi_manage_capsule_image_header);
+ switch (img_hdr->ver) {
+ case 1:
+ return size - 2 * sizeof(u64);
+
+ case 2:
+ return size - sizeof(u64);
+
+ default:
+ /* only support version 1 and 2 */
+ return -EINVAL;
+ }
+}
+
+static bool applicable_image(const void *data, struct pfru_update_cap_info *cap,
+ struct pfru_device *pfru_dev)
+{
+ struct pfru_payload_hdr *payload_hdr;
+ const efi_capsule_header_t *cap_hdr = data;
+ const struct efi_manage_capsule_header *m_hdr;
+ const struct efi_manage_capsule_image_header *m_img_hdr;
+ const struct efi_image_auth *auth;
+ int type, size;
+
+ /*
+ * If the code in the capsule is older than the current
+ * firmware code, the update will be rejected by the firmware,
+ * so check the version of it upfront without engaging the
+ * Management Mode update mechanism which may be costly.
+ */
+ size = cap_hdr->headersize;
+ m_hdr = data + size;
+ /*
+ * Current data structure size plus variable array indicated
+ * by number of (emb_drv_cnt + payload_cnt)
+ */
+ size += offsetof(struct efi_manage_capsule_header, offset_list) +
+ (m_hdr->emb_drv_cnt + m_hdr->payload_cnt) * sizeof(u64);
+ m_img_hdr = data + size;
+
+ type = get_image_type(m_img_hdr, pfru_dev);
+ if (type < 0)
+ return false;
+
+ size = adjust_efi_size(m_img_hdr, size);
+ if (size < 0)
+ return false;
+
+ auth = data + size;
+ size += sizeof(u64) + auth->auth_info.hdr.len;
+ payload_hdr = (struct pfru_payload_hdr *)(data + size);
+
+ /* finally compare the version */
+ if (type == PFRU_CODE_INJECT_TYPE)
+ return payload_hdr->rt_ver >= cap->code_rt_version;
+
+ return payload_hdr->rt_ver >= cap->drv_rt_version;
+}
+
+static void print_update_debug_info(struct pfru_updated_result *result,
+ struct pfru_device *pfru_dev)
+{
+ dev_dbg(pfru_dev->parent_dev, "Update result:\n");
+ dev_dbg(pfru_dev->parent_dev, "Authentication Time Low:%lld\n",
+ result->low_auth_time);
+ dev_dbg(pfru_dev->parent_dev, "Authentication Time High:%lld\n",
+ result->high_auth_time);
+ dev_dbg(pfru_dev->parent_dev, "Execution Time Low:%lld\n",
+ result->low_exec_time);
+ dev_dbg(pfru_dev->parent_dev, "Execution Time High:%lld\n",
+ result->high_exec_time);
+}
+
+static int start_update(int action, struct pfru_device *pfru_dev)
+{
+ union acpi_object *out_obj, in_obj, in_buf;
+ struct pfru_updated_result update_result;
+ acpi_handle handle;
+ int ret = -EINVAL;
+
+ memset(&in_obj, 0, sizeof(in_obj));
+ memset(&in_buf, 0, sizeof(in_buf));
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = 1;
+ in_obj.package.elements = &in_buf;
+ in_buf.type = ACPI_TYPE_INTEGER;
+ in_buf.integer.value = action;
+
+ handle = ACPI_HANDLE(pfru_dev->parent_dev);
+ out_obj = acpi_evaluate_dsm_typed(handle, &pfru_guid,
+ pfru_dev->rev_id, PFRU_FUNC_START,
+ &in_obj, ACPI_TYPE_PACKAGE);
+ if (!out_obj)
+ return ret;
+
+ if (out_obj->package.count < UPDATE_NR_IDX ||
+ out_obj->package.elements[UPDATE_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[UPDATE_EXT_STATUS_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[UPDATE_AUTH_TIME_LOW_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[UPDATE_AUTH_TIME_HI_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[UPDATE_EXEC_TIME_LOW_IDX].type != ACPI_TYPE_INTEGER ||
+ out_obj->package.elements[UPDATE_EXEC_TIME_HI_IDX].type != ACPI_TYPE_INTEGER)
+ goto free_acpi_buffer;
+
+ update_result.status =
+ out_obj->package.elements[UPDATE_STATUS_IDX].integer.value;
+ update_result.ext_status =
+ out_obj->package.elements[UPDATE_EXT_STATUS_IDX].integer.value;
+
+ if (update_result.status != DSM_SUCCEED) {
+ ret = -EBUSY;
+ dev_dbg(pfru_dev->parent_dev, "Error Status:%d\n", update_result.status);
+ dev_dbg(pfru_dev->parent_dev, "Error Extended Status:%d\n",
+ update_result.ext_status);
+
+ goto free_acpi_buffer;
+ }
+
+ update_result.low_auth_time =
+ out_obj->package.elements[UPDATE_AUTH_TIME_LOW_IDX].integer.value;
+ update_result.high_auth_time =
+ out_obj->package.elements[UPDATE_AUTH_TIME_HI_IDX].integer.value;
+ update_result.low_exec_time =
+ out_obj->package.elements[UPDATE_EXEC_TIME_LOW_IDX].integer.value;
+ update_result.high_exec_time =
+ out_obj->package.elements[UPDATE_EXEC_TIME_HI_IDX].integer.value;
+
+ print_update_debug_info(&update_result, pfru_dev);
+ ret = 0;
+
+free_acpi_buffer:
+ kfree(out_obj);
+
+ return ret;
+}
+
+static long pfru_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct pfru_update_cap_info cap_hdr;
+ struct pfru_device *pfru_dev = to_pfru_dev(file);
+ void __user *p = (void __user *)arg;
+ u32 rev;
+ int ret;
+
+ switch (cmd) {
+ case PFRU_IOC_QUERY_CAP:
+ ret = query_capability(&cap_hdr, pfru_dev);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(p, &cap_hdr, sizeof(cap_hdr)))
+ return -EFAULT;
+
+ return 0;
+
+ case PFRU_IOC_SET_REV:
+ if (copy_from_user(&rev, p, sizeof(rev)))
+ return -EFAULT;
+
+ if (!pfru_valid_revid(rev))
+ return -EINVAL;
+
+ pfru_dev->rev_id = rev;
+
+ return 0;
+
+ case PFRU_IOC_STAGE:
+ return start_update(START_STAGE, pfru_dev);
+
+ case PFRU_IOC_ACTIVATE:
+ return start_update(START_ACTIVATE, pfru_dev);
+
+ case PFRU_IOC_STAGE_ACTIVATE:
+ return start_update(START_STAGE_ACTIVATE, pfru_dev);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static ssize_t pfru_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct pfru_device *pfru_dev = to_pfru_dev(file);
+ struct pfru_update_cap_info cap;
+ struct pfru_com_buf_info buf_info;
+ phys_addr_t phy_addr;
+ struct iov_iter iter;
+ struct iovec iov;
+ char *buf_ptr;
+ int ret;
+
+ ret = query_buffer(&buf_info, pfru_dev);
+ if (ret)
+ return ret;
+
+ if (len > buf_info.buf_size)
+ return -EINVAL;
+
+ iov.iov_base = (void __user *)buf;
+ iov.iov_len = len;
+ iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+ /* map the communication buffer */
+ phy_addr = (phys_addr_t)((buf_info.addr_hi << 32) | buf_info.addr_lo);
+ buf_ptr = memremap(phy_addr, buf_info.buf_size, MEMREMAP_WB);
+ if (!buf_ptr)
+ return -ENOMEM;
+
+ if (!copy_from_iter_full(buf_ptr, len, &iter)) {
+ ret = -EINVAL;
+ goto unmap;
+ }
+
+ /* check if the capsule header has a valid version number */
+ ret = query_capability(&cap, pfru_dev);
+ if (ret)
+ goto unmap;
+
+ if (!applicable_image(buf_ptr, &cap, pfru_dev))
+ ret = -EINVAL;
+
+unmap:
+ memunmap(buf_ptr);
+
+ return ret ?: len;
+}
+
+static const struct file_operations acpi_pfru_fops = {
+ .owner = THIS_MODULE,
+ .write = pfru_write,
+ .unlocked_ioctl = pfru_ioctl,
+ .llseek = noop_llseek,
+};
+
+static int acpi_pfru_remove(struct platform_device *pdev)
+{
+ struct pfru_device *pfru_dev = platform_get_drvdata(pdev);
+
+ misc_deregister(&pfru_dev->miscdev);
+
+ return 0;
+}
+
+static void pfru_put_idx(void *data)
+{
+ struct pfru_device *pfru_dev = data;
+
+ ida_free(&pfru_ida, pfru_dev->index);
+}
+
+static int acpi_pfru_probe(struct platform_device *pdev)
+{
+ acpi_handle handle = ACPI_HANDLE(&pdev->dev);
+ struct pfru_device *pfru_dev;
+ int ret;
+
+ if (!acpi_has_method(handle, "_DSM")) {
+ dev_dbg(&pdev->dev, "Missing _DSM\n");
+ return -ENODEV;
+ }
+
+ pfru_dev = devm_kzalloc(&pdev->dev, sizeof(*pfru_dev), GFP_KERNEL);
+ if (!pfru_dev)
+ return -ENOMEM;
+
+ ret = ida_alloc(&pfru_ida, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ pfru_dev->index = ret;
+ ret = devm_add_action_or_reset(&pdev->dev, pfru_put_idx, pfru_dev);
+ if (ret)
+ return ret;
+
+ pfru_dev->rev_id = PFRU_DEFAULT_REV_ID;
+ pfru_dev->parent_dev = &pdev->dev;
+
+ pfru_dev->miscdev.minor = MISC_DYNAMIC_MINOR;
+ pfru_dev->miscdev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "pfru%d", pfru_dev->index);
+ if (!pfru_dev->miscdev.name)
+ return -ENOMEM;
+
+ pfru_dev->miscdev.nodename = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "acpi_pfr_update%d", pfru_dev->index);
+ if (!pfru_dev->miscdev.nodename)
+ return -ENOMEM;
+
+ pfru_dev->miscdev.fops = &acpi_pfru_fops;
+ pfru_dev->miscdev.parent = &pdev->dev;
+
+ ret = misc_register(&pfru_dev->miscdev);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, pfru_dev);
+
+ return 0;
+}
+
+static const struct acpi_device_id acpi_pfru_ids[] = {
+ {"INTC1080"},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, acpi_pfru_ids);
+
+static struct platform_driver acpi_pfru_driver = {
+ .driver = {
+ .name = "pfr_update",
+ .acpi_match_table = acpi_pfru_ids,
+ },
+ .probe = acpi_pfru_probe,
+ .remove = acpi_pfru_remove,
+};
+module_platform_driver(acpi_pfru_driver);
+
+MODULE_DESCRIPTION("Platform Firmware Runtime Update device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index 0cca7991f186..4322f2da6d10 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -127,7 +127,7 @@ static int
acpi_system_wakeup_device_open_fs(struct inode *inode, struct file *file)
{
return single_open(file, acpi_system_wakeup_device_seq_show,
- PDE_DATA(inode));
+ pde_data(inode));
}
static const struct proc_ops acpi_system_wakeup_device_proc_ops = {
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index c215bc8723d0..1331756d4cfc 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2502,42 +2502,33 @@ int acpi_bus_register_early_device(int type)
}
EXPORT_SYMBOL_GPL(acpi_bus_register_early_device);
-static int acpi_bus_scan_fixed(void)
+static void acpi_bus_scan_fixed(void)
{
- int result = 0;
-
- /*
- * Enumerate all fixed-feature devices.
- */
if (!(acpi_gbl_FADT.flags & ACPI_FADT_POWER_BUTTON)) {
- struct acpi_device *device = NULL;
-
- result = acpi_add_single_object(&device, NULL,
- ACPI_BUS_TYPE_POWER_BUTTON, false);
- if (result)
- return result;
-
- device->flags.match_driver = true;
- result = device_attach(&device->dev);
- if (result < 0)
- return result;
-
- device_init_wakeup(&device->dev, true);
+ struct acpi_device *adev = NULL;
+
+ acpi_add_single_object(&adev, NULL, ACPI_BUS_TYPE_POWER_BUTTON,
+ false);
+ if (adev) {
+ adev->flags.match_driver = true;
+ if (device_attach(&adev->dev) >= 0)
+ device_init_wakeup(&adev->dev, true);
+ else
+ dev_dbg(&adev->dev, "No driver\n");
+ }
}
if (!(acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON)) {
- struct acpi_device *device = NULL;
-
- result = acpi_add_single_object(&device, NULL,
- ACPI_BUS_TYPE_SLEEP_BUTTON, false);
- if (result)
- return result;
-
- device->flags.match_driver = true;
- result = device_attach(&device->dev);
+ struct acpi_device *adev = NULL;
+
+ acpi_add_single_object(&adev, NULL, ACPI_BUS_TYPE_SLEEP_BUTTON,
+ false);
+ if (adev) {
+ adev->flags.match_driver = true;
+ if (device_attach(&adev->dev) < 0)
+ dev_dbg(&adev->dev, "No driver\n");
+ }
}
-
- return result < 0 ? result : 0;
}
static void __init acpi_get_spcr_uart_addr(void)
@@ -2558,9 +2549,8 @@ static void __init acpi_get_spcr_uart_addr(void)
static bool acpi_scan_initialized;
-int __init acpi_scan_init(void)
+void __init acpi_scan_init(void)
{
- int result;
acpi_status status;
struct acpi_table_stao *stao_ptr;
@@ -2610,33 +2600,23 @@ int __init acpi_scan_init(void)
/*
* Enumerate devices in the ACPI namespace.
*/
- result = acpi_bus_scan(ACPI_ROOT_OBJECT);
- if (result)
- goto out;
+ if (acpi_bus_scan(ACPI_ROOT_OBJECT))
+ goto unlock;
acpi_root = acpi_fetch_acpi_dev(ACPI_ROOT_OBJECT);
if (!acpi_root)
- goto out;
+ goto unlock;
/* Fixed feature devices do not exist on HW-reduced platform */
- if (!acpi_gbl_reduced_hardware) {
- result = acpi_bus_scan_fixed();
- if (result) {
- acpi_detach_data(acpi_root->handle,
- acpi_scan_drop_device);
- acpi_device_del(acpi_root);
- acpi_bus_put_acpi_device(acpi_root);
- goto out;
- }
- }
+ if (!acpi_gbl_reduced_hardware)
+ acpi_bus_scan_fixed();
acpi_turn_off_unused_power_resources();
acpi_scan_initialized = true;
- out:
+unlock:
mutex_unlock(&acpi_scan_lock);
- return result;
}
static struct acpi_probe_entry *ape;
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 25c2d0be953e..d589543875b8 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -107,8 +107,13 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
pr_info("SPCR table version %d\n", table->header.revision);
if (table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
- switch (ACPI_ACCESS_BIT_WIDTH((
- table->serial_port.access_width))) {
+ u32 bit_width = table->serial_port.access_width;
+
+ if (bit_width > ACPI_ACCESS_BIT_MAX) {
+ pr_err("Unacceptable wide SPCR Access Width. Defaulting to byte size\n");
+ bit_width = ACPI_ACCESS_BIT_DEFAULT;
+ }
+ switch (ACPI_ACCESS_BIT_WIDTH((bit_width))) {
default:
pr_err("Unexpected SPCR Access Width. Defaulting to byte size\n");
fallthrough;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c75fb600740c..8351c5638880 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -69,7 +69,7 @@
#include <uapi/linux/android/binder.h>
-#include <asm/cacheflush.h>
+#include <linux/cacheflush.h>
#include "binder_internal.h"
#include "binder_trace.h"
@@ -1608,15 +1608,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t,
/**
* binder_get_object() - gets object and checks for valid metadata
* @proc: binder_proc owning the buffer
+ * @u: sender's user pointer to base of buffer
* @buffer: binder_buffer that we're parsing.
* @offset: offset in the @buffer at which to validate an object.
* @object: struct binder_object to read into
*
- * Return: If there's a valid metadata object at @offset in @buffer, the
+ * Copy the binder object at the given offset into @object. If @u is
+ * provided then the copy is from the sender's buffer. If not, then
+ * it is copied from the target's @buffer.
+ *
+ * Return: If there's a valid metadata object at @offset, the
* size of that object. Otherwise, it returns zero. The object
* is read into the struct binder_object pointed to by @object.
*/
static size_t binder_get_object(struct binder_proc *proc,
+ const void __user *u,
struct binder_buffer *buffer,
unsigned long offset,
struct binder_object *object)
@@ -1626,10 +1632,16 @@ static size_t binder_get_object(struct binder_proc *proc,
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
- if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
- binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
- offset, read_size))
+ if (offset > buffer->data_size || read_size < sizeof(*hdr))
return 0;
+ if (u) {
+ if (copy_from_user(object, u + offset, read_size))
+ return 0;
+ } else {
+ if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
+ offset, read_size))
+ return 0;
+ }
/* Ok, now see if we read a complete object. */
hdr = &object->hdr;
@@ -1702,7 +1714,7 @@ static struct binder_buffer_object *binder_validate_ptr(
b, buffer_offset,
sizeof(object_offset)))
return NULL;
- object_size = binder_get_object(proc, b, object_offset, object);
+ object_size = binder_get_object(proc, NULL, b, object_offset, object);
if (!object_size || object->hdr.type != BINDER_TYPE_PTR)
return NULL;
if (object_offsetp)
@@ -1767,7 +1779,8 @@ static bool binder_validate_fixup(struct binder_proc *proc,
unsigned long buffer_offset;
struct binder_object last_object;
struct binder_buffer_object *last_bbo;
- size_t object_size = binder_get_object(proc, b, last_obj_offset,
+ size_t object_size = binder_get_object(proc, NULL, b,
+ last_obj_offset,
&last_object);
if (object_size != sizeof(*last_bbo))
return false;
@@ -1882,7 +1895,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
buffer, buffer_offset,
sizeof(object_offset)))
- object_size = binder_get_object(proc, buffer,
+ object_size = binder_get_object(proc, NULL, buffer,
object_offset, &object);
if (object_size == 0) {
pr_err("transaction release %d bad object at offset %lld, size %zd\n",
@@ -1933,7 +1946,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
case BINDER_TYPE_FD: {
/*
* No need to close the file here since user-space
- * closes it for for successfully delivered
+ * closes it for successfully delivered
* transactions. For transactions that weren't
* delivered, the new fd was never allocated so
* there is no need to close and the fput on the
@@ -2220,16 +2233,258 @@ err_fd_not_accepted:
return ret;
}
-static int binder_translate_fd_array(struct binder_fd_array_object *fda,
+/**
+ * struct binder_ptr_fixup - data to be fixed-up in target buffer
+ * @offset offset in target buffer to fixup
+ * @skip_size bytes to skip in copy (fixup will be written later)
+ * @fixup_data data to write at fixup offset
+ * @node list node
+ *
+ * This is used for the pointer fixup list (pf) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_ptr_fixup {
+ binder_size_t offset;
+ size_t skip_size;
+ binder_uintptr_t fixup_data;
+ struct list_head node;
+};
+
+/**
+ * struct binder_sg_copy - scatter-gather data to be copied
+ * @offset offset in target buffer
+ * @sender_uaddr user address in source buffer
+ * @length bytes to copy
+ * @node list node
+ *
+ * This is used for the sg copy list (sgc) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_sg_copy {
+ binder_size_t offset;
+ const void __user *sender_uaddr;
+ size_t length;
+ struct list_head node;
+};
+
+/**
+ * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data
+ * @alloc: binder_alloc associated with @buffer
+ * @buffer: binder buffer in target process
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Processes all elements of @sgc_head, applying fixups from @pf_head
+ * and copying the scatter-gather data from the source process' user
+ * buffer to the target's buffer. It is expected that the list creation
+ * and processing all occurs during binder_transaction() so these lists
+ * are only accessed in local context.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ int ret = 0;
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *pf =
+ list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
+ node);
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < sgc->length) {
+ size_t copy_size;
+ size_t bytes_left = sgc->length - bytes_copied;
+ size_t offset = sgc->offset + bytes_copied;
+
+ /*
+ * We copy up to the fixup (pointed to by pf)
+ */
+ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset)
+ : bytes_left;
+ if (!ret && copy_size)
+ ret = binder_alloc_copy_user_to_buffer(
+ alloc, buffer,
+ offset,
+ sgc->sender_uaddr + bytes_copied,
+ copy_size);
+ bytes_copied += copy_size;
+ if (copy_size != bytes_left) {
+ BUG_ON(!pf);
+ /* we stopped at a fixup offset */
+ if (pf->skip_size) {
+ /*
+ * we are just skipping. This is for
+ * BINDER_TYPE_FDA where the translated
+ * fds will be fixed up when we get
+ * to target context.
+ */
+ bytes_copied += pf->skip_size;
+ } else {
+ /* apply the fixup indicated by pf */
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(
+ alloc, buffer,
+ pf->offset,
+ &pf->fixup_data,
+ sizeof(pf->fixup_data));
+ bytes_copied += sizeof(pf->fixup_data);
+ }
+ list_del(&pf->node);
+ kfree(pf);
+ pf = list_first_entry_or_null(pf_head,
+ struct binder_ptr_fixup, node);
+ }
+ }
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ BUG_ON(!list_empty(pf_head));
+ BUG_ON(!list_empty(sgc_head));
+
+ return ret > 0 ? -EINVAL : ret;
+}
+
+/**
+ * binder_cleanup_deferred_txn_lists() - free specified lists
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Called to clean up @sgc_head and @pf_head if there is an
+ * error.
+ */
+static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *pf, *tmppf;
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ list_del(&pf->node);
+ kfree(pf);
+ }
+}
+
+/**
+ * binder_defer_copy() - queue a scatter-gather buffer for copy
+ * @sgc_head: list_head of scatter-gather copy list
+ * @offset: binder buffer offset in target process
+ * @sender_uaddr: user address in source process
+ * @length: bytes to copy
+ *
+ * Specify a scatter-gather block to be copied. The actual copy must
+ * be deferred until all the needed fixups are identified and queued.
+ * Then the copy and fixups are done together so un-translated values
+ * from the source are never visible in the target buffer.
+ *
+ * We are guaranteed that repeated calls to this function will have
+ * monotonically increasing @offset values so the list will naturally
+ * be ordered.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset,
+ const void __user *sender_uaddr, size_t length)
+{
+ struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL);
+
+ if (!bc)
+ return -ENOMEM;
+
+ bc->offset = offset;
+ bc->sender_uaddr = sender_uaddr;
+ bc->length = length;
+ INIT_LIST_HEAD(&bc->node);
+
+ /*
+ * We are guaranteed that the deferred copies are in-order
+ * so just add to the tail.
+ */
+ list_add_tail(&bc->node, sgc_head);
+
+ return 0;
+}
+
+/**
+ * binder_add_fixup() - queue a fixup to be applied to sg copy
+ * @pf_head: list_head of binder ptr fixup list
+ * @offset: binder buffer offset in target process
+ * @fixup: bytes to be copied for fixup
+ * @skip_size: bytes to skip when copying (fixup will be applied later)
+ *
+ * Add the specified fixup to a list ordered by @offset. When copying
+ * the scatter-gather buffers, the fixup will be copied instead of
+ * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup
+ * will be applied later (in target process context), so we just skip
+ * the bytes specified by @skip_size. If @skip_size is 0, we copy the
+ * value in @fixup.
+ *
+ * This function is called *mostly* in @offset order, but there are
+ * exceptions. Since out-of-order inserts are relatively uncommon,
+ * we insert the new element by searching backward from the tail of
+ * the list.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset,
+ binder_uintptr_t fixup, size_t skip_size)
+{
+ struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL);
+ struct binder_ptr_fixup *tmppf;
+
+ if (!pf)
+ return -ENOMEM;
+
+ pf->offset = offset;
+ pf->fixup_data = fixup;
+ pf->skip_size = skip_size;
+ INIT_LIST_HEAD(&pf->node);
+
+ /* Fixups are *mostly* added in-order, but there are some
+ * exceptions. Look backwards through list for insertion point.
+ */
+ list_for_each_entry_reverse(tmppf, pf_head, node) {
+ if (tmppf->offset < pf->offset) {
+ list_add(&pf->node, &tmppf->node);
+ return 0;
+ }
+ }
+ /*
+ * if we get here, then the new offset is the lowest so
+ * insert at the head
+ */
+ list_add(&pf->node, pf_head);
+ return 0;
+}
+
+static int binder_translate_fd_array(struct list_head *pf_head,
+ struct binder_fd_array_object *fda,
+ const void __user *sender_ubuffer,
struct binder_buffer_object *parent,
+ struct binder_buffer_object *sender_uparent,
struct binder_transaction *t,
struct binder_thread *thread,
struct binder_transaction *in_reply_to)
{
binder_size_t fdi, fd_buf_size;
binder_size_t fda_offset;
+ const void __user *sender_ufda_base;
struct binder_proc *proc = thread->proc;
- struct binder_proc *target_proc = t->to_proc;
+ int ret;
fd_buf_size = sizeof(u32) * fda->num_fds;
if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
@@ -2253,29 +2508,36 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
*/
fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) +
fda->parent_offset;
- if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) {
+ sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer +
+ fda->parent_offset;
+
+ if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) ||
+ !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) {
binder_user_error("%d:%d parent offset not aligned correctly.\n",
proc->pid, thread->pid);
return -EINVAL;
}
+ ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32));
+ if (ret)
+ return ret;
+
for (fdi = 0; fdi < fda->num_fds; fdi++) {
u32 fd;
- int ret;
binder_size_t offset = fda_offset + fdi * sizeof(fd);
+ binder_size_t sender_uoffset = fdi * sizeof(fd);
- ret = binder_alloc_copy_from_buffer(&target_proc->alloc,
- &fd, t->buffer,
- offset, sizeof(fd));
+ ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd));
if (!ret)
ret = binder_translate_fd(fd, offset, t, thread,
in_reply_to);
- if (ret < 0)
- return ret;
+ if (ret)
+ return ret > 0 ? -EINVAL : ret;
}
return 0;
}
-static int binder_fixup_parent(struct binder_transaction *t,
+static int binder_fixup_parent(struct list_head *pf_head,
+ struct binder_transaction *t,
struct binder_thread *thread,
struct binder_buffer_object *bp,
binder_size_t off_start_offset,
@@ -2321,14 +2583,7 @@ static int binder_fixup_parent(struct binder_transaction *t,
}
buffer_offset = bp->parent_offset +
(uintptr_t)parent->buffer - (uintptr_t)b->user_data;
- if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset,
- &bp->buffer, sizeof(bp->buffer))) {
- binder_user_error("%d:%d got transaction with invalid parent offset\n",
- proc->pid, thread->pid);
- return -EINVAL;
- }
-
- return 0;
+ return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0);
}
/**
@@ -2455,6 +2710,7 @@ static void binder_transaction(struct binder_proc *proc,
binder_size_t off_start_offset, off_end_offset;
binder_size_t off_min;
binder_size_t sg_buf_offset, sg_buf_end_offset;
+ binder_size_t user_offset = 0;
struct binder_proc *target_proc = NULL;
struct binder_thread *target_thread = NULL;
struct binder_node *target_node = NULL;
@@ -2469,6 +2725,12 @@ static void binder_transaction(struct binder_proc *proc,
int t_debug_id = atomic_inc_return(&binder_last_id);
char *secctx = NULL;
u32 secctx_sz = 0;
+ struct list_head sgc_head;
+ struct list_head pf_head;
+ const void __user *user_buffer = (const void __user *)
+ (uintptr_t)tr->data.ptr.buffer;
+ INIT_LIST_HEAD(&sgc_head);
+ INIT_LIST_HEAD(&pf_head);
e = binder_transaction_log_add(&binder_transaction_log);
e->debug_id = t_debug_id;
@@ -2782,19 +3044,6 @@ static void binder_transaction(struct binder_proc *proc,
if (binder_alloc_copy_user_to_buffer(
&target_proc->alloc,
- t->buffer, 0,
- (const void __user *)
- (uintptr_t)tr->data.ptr.buffer,
- tr->data_size)) {
- binder_user_error("%d:%d got transaction with invalid data ptr\n",
- proc->pid, thread->pid);
- return_error = BR_FAILED_REPLY;
- return_error_param = -EFAULT;
- return_error_line = __LINE__;
- goto err_copy_data_failed;
- }
- if (binder_alloc_copy_user_to_buffer(
- &target_proc->alloc,
t->buffer,
ALIGN(tr->data_size, sizeof(void *)),
(const void __user *)
@@ -2837,6 +3086,7 @@ static void binder_transaction(struct binder_proc *proc,
size_t object_size;
struct binder_object object;
binder_size_t object_offset;
+ binder_size_t copy_size;
if (binder_alloc_copy_from_buffer(&target_proc->alloc,
&object_offset,
@@ -2848,8 +3098,27 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
- object_size = binder_get_object(target_proc, t->buffer,
- object_offset, &object);
+
+ /*
+ * Copy the source user buffer up to the next object
+ * that will be processed.
+ */
+ copy_size = object_offset - user_offset;
+ if (copy_size && (user_offset > object_offset ||
+ binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ copy_size))) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+ object_size = binder_get_object(target_proc, user_buffer,
+ t->buffer, object_offset, &object);
if (object_size == 0 || object_offset < off_min) {
binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
proc->pid, thread->pid,
@@ -2861,6 +3130,11 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
+ /*
+ * Set offset to the next buffer fragment to be
+ * copied
+ */
+ user_offset = object_offset + object_size;
hdr = &object.hdr;
off_min = object_offset + object_size;
@@ -2923,6 +3197,8 @@ static void binder_transaction(struct binder_proc *proc,
case BINDER_TYPE_FDA: {
struct binder_object ptr_object;
binder_size_t parent_offset;
+ struct binder_object user_object;
+ size_t user_parent_size;
struct binder_fd_array_object *fda =
to_binder_fd_array_object(hdr);
size_t num_valid = (buffer_offset - off_start_offset) /
@@ -2954,11 +3230,35 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_parent;
}
- ret = binder_translate_fd_array(fda, parent, t, thread,
- in_reply_to);
- if (ret < 0) {
+ /*
+ * We need to read the user version of the parent
+ * object to get the original user offset
+ */
+ user_parent_size =
+ binder_get_object(proc, user_buffer, t->buffer,
+ parent_offset, &user_object);
+ if (user_parent_size != sizeof(user_object.bbo)) {
+ binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n",
+ proc->pid, thread->pid,
+ user_parent_size,
+ sizeof(user_object.bbo));
return_error = BR_FAILED_REPLY;
- return_error_param = ret;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+ ret = binder_translate_fd_array(&pf_head, fda,
+ user_buffer, parent,
+ &user_object.bbo, t,
+ thread, in_reply_to);
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fda, sizeof(*fda));
+ if (ret) {
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret > 0 ? -EINVAL : ret;
return_error_line = __LINE__;
goto err_translate_failed;
}
@@ -2980,19 +3280,14 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
- if (binder_alloc_copy_user_to_buffer(
- &target_proc->alloc,
- t->buffer,
- sg_buf_offset,
- (const void __user *)
- (uintptr_t)bp->buffer,
- bp->length)) {
- binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
- proc->pid, thread->pid);
- return_error_param = -EFAULT;
+ ret = binder_defer_copy(&sgc_head, sg_buf_offset,
+ (const void __user *)(uintptr_t)bp->buffer,
+ bp->length);
+ if (ret) {
return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
return_error_line = __LINE__;
- goto err_copy_data_failed;
+ goto err_translate_failed;
}
/* Fixup buffer pointer to target proc address space */
bp->buffer = (uintptr_t)
@@ -3001,7 +3296,8 @@ static void binder_transaction(struct binder_proc *proc,
num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
- ret = binder_fixup_parent(t, thread, bp,
+ ret = binder_fixup_parent(&pf_head, t,
+ thread, bp,
off_start_offset,
num_valid,
last_fixup_obj_off,
@@ -3028,6 +3324,30 @@ static void binder_transaction(struct binder_proc *proc,
goto err_bad_object_type;
}
}
+ /* Done processing objects, copy the rest of the buffer */
+ if (binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ tr->data_size - user_offset)) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+
+ ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer,
+ &sgc_head, &pf_head);
+ if (ret) {
+ binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
if (t->buffer->oneway_spam_suspect)
tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
else
@@ -3101,6 +3421,7 @@ err_bad_object_type:
err_bad_offset:
err_bad_parent:
err_copy_data_failed:
+ binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head);
binder_free_txn_fixups(t);
trace_binder_transaction_failed_buffer_release(t->buffer);
binder_transaction_buffer_release(target_proc, NULL, t->buffer,
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a7da8ea7b3ed..cb54631fd950 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -146,7 +146,7 @@ config SATA_AHCI_PLATFORM
config AHCI_BRCM
tristate "Broadcom AHCI SATA support"
depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \
- ARCH_BCM_63XX
+ ARCH_BCM_63XX || COMPILE_TEST
select SATA_HOST
help
This option enables support for the AHCI SATA3 controller found on
@@ -156,7 +156,7 @@ config AHCI_BRCM
config AHCI_DA850
tristate "DaVinci DA850 AHCI SATA support"
- depends on ARCH_DAVINCI_DA850
+ depends on ARCH_DAVINCI_DA850 || COMPILE_TEST
select SATA_HOST
help
This option enables support for the DaVinci DA850 SoC's
@@ -166,7 +166,7 @@ config AHCI_DA850
config AHCI_DM816
tristate "DaVinci DM816 AHCI SATA support"
- depends on ARCH_OMAP2PLUS
+ depends on ARCH_OMAP2PLUS || COMPILE_TEST
select SATA_HOST
help
This option enables support for the DaVinci DM816 SoC's
@@ -206,7 +206,7 @@ config AHCI_CEVA
config AHCI_MTK
tristate "MediaTek AHCI SATA support"
- depends on ARCH_MEDIATEK
+ depends on ARCH_MEDIATEK || COMPILE_TEST
select MFD_SYSCON
select SATA_HOST
help
@@ -217,7 +217,7 @@ config AHCI_MTK
config AHCI_MVEBU
tristate "Marvell EBU AHCI SATA support"
- depends on ARCH_MVEBU
+ depends on ARCH_MVEBU || COMPILE_TEST
select SATA_HOST
help
This option enables support for the Marvebu EBU SoC's
@@ -236,7 +236,7 @@ config AHCI_OCTEON
config AHCI_SUNXI
tristate "Allwinner sunxi AHCI SATA support"
- depends on ARCH_SUNXI
+ depends on ARCH_SUNXI || COMPILE_TEST
select SATA_HOST
help
This option enables support for the Allwinner sunxi SoC's
@@ -246,7 +246,7 @@ config AHCI_SUNXI
config AHCI_TEGRA
tristate "NVIDIA Tegra AHCI SATA support"
- depends on ARCH_TEGRA
+ depends on ARCH_TEGRA || COMPILE_TEST
select SATA_HOST
help
This option enables support for the NVIDIA Tegra SoC's
@@ -256,7 +256,7 @@ config AHCI_TEGRA
config AHCI_XGENE
tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support"
- depends on PHY_XGENE
+ depends on PHY_XGENE || COMPILE_TEST
select SATA_HOST
help
This option enables support for APM X-Gene SoC SATA host controller.
@@ -273,7 +273,7 @@ config AHCI_QORIQ
config SATA_FSL
tristate "Freescale 3.0Gbps SATA support"
- depends on FSL_SOC
+ depends on FSL_SOC || COMPILE_TEST
select SATA_HOST
help
This option enables support for Freescale 3.0Gbps SATA controller.
@@ -294,7 +294,7 @@ config SATA_GEMINI
config SATA_AHCI_SEATTLE
tristate "AMD Seattle 6.0Gbps AHCI SATA host controller support"
- depends on ARCH_SEATTLE
+ depends on ARCH_SEATTLE || COMPILE_TEST
select SATA_HOST
help
This option enables support for AMD Seattle SATA host controller.
@@ -432,18 +432,6 @@ config SATA_DWC_OLD_DMA
This option enables support for old device trees without the
"dmas" property.
-config SATA_DWC_DEBUG
- bool "Debugging driver version"
- depends on SATA_DWC
- help
- This option enables debugging output in the driver.
-
-config SATA_DWC_VDEBUG
- bool "Verbose debug output"
- depends on SATA_DWC_DEBUG
- help
- This option enables the taskfile dumping and NCQ debugging.
-
config SATA_HIGHBANK
tristate "Calxeda Highbank SATA support"
depends on ARCH_HIGHBANK || COMPILE_TEST
@@ -611,7 +599,7 @@ config PATA_ATP867X
config PATA_BK3710
tristate "Palmchip BK3710 PATA support"
- depends on ARCH_DAVINCI
+ depends on ARCH_DAVINCI || COMPILE_TEST
select PATA_TIMINGS
help
This option enables support for the integrated IDE controller on
@@ -649,7 +637,7 @@ config PATA_CS5530
config PATA_CS5535
tristate "CS5535 PATA support (Experimental)"
- depends on PCI && X86_32
+ depends on PCI && (X86_32 || (X86_64 && COMPILE_TEST))
help
This option enables support for the NatSemi/AMD CS5535
companion chip used with the Geode processor family.
@@ -697,7 +685,7 @@ config PATA_EP93XX
config PATA_FTIDE010
tristate "Faraday Technology FTIDE010 PATA support"
depends on OF
- depends on ARM
+ depends on ARM || COMPILE_TEST
depends on SATA_GEMINI
help
This option enables support for the Faraday FTIDE010
@@ -760,7 +748,7 @@ config PATA_ICSIDE
config PATA_IMX
tristate "PATA support for Freescale iMX"
- depends on ARCH_MXC
+ depends on ARCH_MXC || COMPILE_TEST
select PATA_TIMINGS
help
This option enables support for the PATA host available on Freescale
@@ -981,7 +969,7 @@ config PATA_VIA
config PATA_PXA
tristate "PXA DMA-capable PATA support"
- depends on ARCH_PXA
+ depends on ARCH_PXA || COMPILE_TEST
help
This option enables support for harddrive attached to PXA CPU's bus.
@@ -1157,7 +1145,7 @@ config PATA_RZ1000
config PATA_SAMSUNG_CF
tristate "Samsung SoC PATA support"
- depends on SAMSUNG_DEV_IDE
+ depends on SAMSUNG_DEV_IDE || COMPILE_TEST
select PATA_TIMINGS
help
This option enables basic support for Samsung's S3C/S5P board
diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index 2a04e8abd397..536d4cb8f08b 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -185,8 +185,6 @@ static unsigned int acard_ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl)
struct acard_sg *acard_sg = cmd_tbl + AHCI_CMD_TBL_HDR_SZ;
unsigned int si, last_si = 0;
- VPRINTK("ENTER\n");
-
/*
* Next, the S/G list.
*/
@@ -362,8 +360,6 @@ static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id
struct ata_host *host;
int n_ports, i, rc;
- VPRINTK("ENTER\n");
-
WARN_ON((int)ATA_MAX_QUEUE > AHCI_MAX_CMDS);
ata_print_version_once(&pdev->dev, DRV_VERSION);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 1e1167e725a4..ab5811ef5a53 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -51,6 +51,7 @@ enum board_ids {
board_ahci,
board_ahci_ign_iferr,
board_ahci_mobile,
+ board_ahci_no_debounce_delay,
board_ahci_nomsi,
board_ahci_noncq,
board_ahci_nosntf,
@@ -141,6 +142,13 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
+ [board_ahci_no_debounce_delay] = {
+ .flags = AHCI_FLAG_COMMON,
+ .link_flags = ATA_LFLAG_NO_DEBOUNCE_DELAY,
+ .pio_mask = ATA_PIO4,
+ .udma_mask = ATA_UDMA6,
+ .port_ops = &ahci_ops,
+ },
[board_ahci_nomsi] = {
AHCI_HFLAGS (AHCI_HFLAG_NO_MSI),
.flags = AHCI_FLAG_COMMON,
@@ -437,6 +445,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
board_ahci_al },
/* AMD */
{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
+ { PCI_VDEVICE(AMD, 0x7801), board_ahci_no_debounce_delay }, /* AMD Hudson-2 (AHCI mode) */
{ PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
{ PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */
/* AMD is using RAID class only for ahci controllers */
@@ -684,7 +693,7 @@ static void ahci_pci_init_controller(struct ata_host *host)
/* clear port IRQ */
tmp = readl(port_mmio + PORT_IRQ_STAT);
- VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
+ dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
if (tmp)
writel(tmp, port_mmio + PORT_IRQ_STAT);
}
@@ -700,8 +709,6 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
bool online;
int rc;
- DPRINTK("ENTER\n");
-
hpriv->stop_engine(ap);
rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context),
@@ -709,8 +716,6 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
hpriv->start_engine(ap);
- DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class);
-
/* vt8251 doesn't clear BSY on signature FIS reception,
* request follow-up softreset.
*/
@@ -790,8 +795,6 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
bool online;
int rc, i;
- DPRINTK("ENTER\n");
-
hpriv->stop_engine(ap);
for (i = 0; i < 2; i++) {
@@ -829,7 +832,6 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
if (online)
*class = ahci_dev_classify(ap);
- DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class);
return rc;
}
@@ -1476,7 +1478,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
u32 irq_stat, irq_masked;
unsigned int handled = 1;
- VPRINTK("ENTER\n");
hpriv = host->private_data;
mmio = hpriv->mmio;
irq_stat = readl(mmio + HOST_IRQ_STAT);
@@ -1493,7 +1494,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
irq_stat = readl(mmio + HOST_IRQ_STAT);
spin_unlock(&host->lock);
} while (irq_stat);
- VPRINTK("EXIT\n");
return IRQ_RETVAL(handled);
}
@@ -1657,7 +1657,7 @@ static ssize_t remapped_nvme_show(struct device *dev,
struct ata_host *host = dev_get_drvdata(dev);
struct ahci_host_priv *hpriv = host->private_data;
- return sprintf(buf, "%u\n", hpriv->remapped_nvme);
+ return sysfs_emit(buf, "%u\n", hpriv->remapped_nvme);
}
static DEVICE_ATTR_RO(remapped_nvme);
@@ -1673,8 +1673,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
int n_ports, i, rc;
int ahci_pci_bar = AHCI_PCI_BAR_STANDARD;
- VPRINTK("ENTER\n");
-
WARN_ON((int)ATA_MAX_QUEUE > AHCI_MAX_CMDS);
ata_print_version_once(&pdev->dev, DRV_VERSION);
diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index 6e9c5ade4c2e..64dd8aa397d5 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -246,7 +246,7 @@ static void brcm_sata_init(struct brcm_ahci_priv *priv)
}
static unsigned int brcm_ahci_read_id(struct ata_device *dev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
struct ata_port *ap = dev->link->ap;
struct ata_host *host = ap->host;
@@ -333,7 +333,7 @@ static struct ata_port_operations ahci_brcm_platform_ops = {
static const struct ata_port_info ahci_brcm_port_info = {
.flags = AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM,
- .link_flags = ATA_LFLAG_NO_DB_DELAY,
+ .link_flags = ATA_LFLAG_NO_DEBOUNCE_DELAY,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_brcm_platform_ops,
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c
index e9c7c07fd84c..acf59f51b356 100644
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -92,9 +92,8 @@ struct ceva_ahci_priv {
};
static unsigned int ceva_ahci_read_id(struct ata_device *dev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
- __le16 *__id = (__le16 *)id;
u32 err_mask;
err_mask = ata_do_dev_read_id(dev, tf, id);
@@ -104,7 +103,7 @@ static unsigned int ceva_ahci_read_id(struct ata_device *dev,
* Since CEVA controller does not support device sleep feature, we
* need to clear DEVSLP (bit 8) in word78 of the IDENTIFY DEVICE data.
*/
- __id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8));
+ id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8));
return 0;
}
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 5b46fc9aeb4a..bf5b388bd4e0 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -103,8 +103,6 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
int rc;
bool ls1021a_workaround = (qoriq_priv->type == AHCI_LS1021A);
- DPRINTK("ENTER\n");
-
hpriv->stop_engine(ap);
/*
@@ -146,8 +144,6 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
if (online)
*class = ahci_dev_classify(ap);
-
- DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class);
return rc;
}
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index dffc432b9d54..8e206379d699 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -193,7 +193,7 @@ static unsigned int xgene_ahci_qc_issue(struct ata_queued_cmd *qc)
struct xgene_ahci_context *ctx = hpriv->plat_data;
int rc = 0;
u32 port_fbs;
- void *port_mmio = ahci_port_base(ap);
+ void __iomem *port_mmio = ahci_port_base(ap);
/*
* Write the pmp value to PxFBS.DEV
@@ -237,7 +237,7 @@ static bool xgene_ahci_is_memram_inited(struct xgene_ahci_context *ctx)
* does not support DEVSLP.
*/
static unsigned int xgene_ahci_read_id(struct ata_device *dev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
u32 err_mask;
@@ -454,7 +454,7 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class,
int pmp = sata_srst_pmp(link);
struct ata_port *ap = link->ap;
u32 rc;
- void *port_mmio = ahci_port_base(ap);
+ void __iomem *port_mmio = ahci_port_base(ap);
u32 port_fbs;
/*
@@ -499,7 +499,7 @@ static int xgene_ahci_softreset(struct ata_link *link, unsigned int *class,
struct ata_port *ap = link->ap;
struct ahci_host_priv *hpriv = ap->host->private_data;
struct xgene_ahci_context *ctx = hpriv->plat_data;
- void *port_mmio = ahci_port_base(ap);
+ void __iomem *port_mmio = ahci_port_base(ap);
u32 port_fbs;
u32 port_fbs_save;
u32 retry = 1;
@@ -588,8 +588,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
void __iomem *mmio;
u32 irq_stat, irq_masked;
- VPRINTK("ENTER\n");
-
hpriv = host->private_data;
mmio = hpriv->mmio;
@@ -612,8 +610,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
spin_unlock(&host->lock);
- VPRINTK("EXIT\n");
-
return IRQ_RETVAL(rc);
}
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 0b2fcf0d1d6c..27b0d903f91f 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -77,6 +77,7 @@
#include <scsi/scsi_host.h>
#include <linux/libata.h>
#include <linux/dmi.h>
+#include <trace/events/libata.h>
#define DRV_NAME "ata_piix"
#define DRV_VERSION "2.13"
@@ -816,10 +817,15 @@ static int piix_sidpr_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
static bool piix_irq_check(struct ata_port *ap)
{
+ unsigned char host_stat;
+
if (unlikely(!ap->ioaddr.bmdma_addr))
return false;
- return ap->ops->bmdma_status(ap) & ATA_DMA_INTR;
+ host_stat = ap->ops->bmdma_status(ap);
+ trace_ata_bmdma_status(ap, host_stat);
+
+ return host_stat & ATA_DMA_INTR;
}
#ifdef CONFIG_PM_SLEEP
@@ -1345,7 +1351,6 @@ static void piix_init_pcs(struct ata_host *host,
new_pcs = pcs | map_db->port_enable;
if (new_pcs != pcs) {
- DPRINTK("updating PCS from 0x%x to 0x%x\n", pcs, new_pcs);
pci_write_config_word(pdev, ICH5_PCS, new_pcs);
msleep(150);
}
@@ -1769,14 +1774,12 @@ static int __init piix_init(void)
{
int rc;
- DPRINTK("pci_register_driver\n");
rc = pci_register_driver(&piix_pci_driver);
if (rc)
return rc;
in_module_init = 0;
- DPRINTK("done\n");
return 0;
}
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index f76b8418e6fb..0ed484e04fd6 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1234,12 +1234,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
/* clear SError */
tmp = readl(port_mmio + PORT_SCR_ERR);
- VPRINTK("PORT_SCR_ERR 0x%x\n", tmp);
+ dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
writel(tmp, port_mmio + PORT_SCR_ERR);
/* clear port IRQ */
tmp = readl(port_mmio + PORT_IRQ_STAT);
- VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
+ dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
if (tmp)
writel(tmp, port_mmio + PORT_IRQ_STAT);
@@ -1270,10 +1270,10 @@ void ahci_init_controller(struct ata_host *host)
}
tmp = readl(mmio + HOST_CTL);
- VPRINTK("HOST_CTL 0x%x\n", tmp);
+ dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL);
tmp = readl(mmio + HOST_CTL);
- VPRINTK("HOST_CTL 0x%x\n", tmp);
+ dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
}
EXPORT_SYMBOL_GPL(ahci_init_controller);
@@ -1300,7 +1300,7 @@ unsigned int ahci_dev_classify(struct ata_port *ap)
tf.lbal = (tmp >> 8) & 0xff;
tf.nsect = (tmp) & 0xff;
- return ata_dev_classify(&tf);
+ return ata_port_classify(ap, &tf);
}
EXPORT_SYMBOL_GPL(ahci_dev_classify);
@@ -1415,8 +1415,6 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
bool fbs_disabled = false;
int rc;
- DPRINTK("ENTER\n");
-
/* prepare for SRST (AHCI-1.1 10.4.1) */
rc = ahci_kick_engine(ap);
if (rc && rc != -EOPNOTSUPP)
@@ -1476,7 +1474,6 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
if (fbs_disabled)
ahci_enable_fbs(ap);
- DPRINTK("EXIT, class=%u\n", *class);
return 0;
fail:
@@ -1498,8 +1495,6 @@ static int ahci_softreset(struct ata_link *link, unsigned int *class,
{
int pmp = sata_srst_pmp(link);
- DPRINTK("ENTER\n");
-
return ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready);
}
EXPORT_SYMBOL_GPL(ahci_do_softreset);
@@ -1529,8 +1524,6 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class,
int rc;
u32 irq_sts;
- DPRINTK("ENTER\n");
-
rc = ahci_do_softreset(link, class, pmp, deadline,
ahci_bad_pmp_check_ready);
@@ -1564,8 +1557,6 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
struct ata_taskfile tf;
int rc;
- DPRINTK("ENTER\n");
-
hpriv->stop_engine(ap);
/* clear D2H reception area to properly wait for D2H FIS */
@@ -1581,7 +1572,6 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
if (*online)
*class = ahci_dev_classify(ap);
- DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class);
return rc;
}
EXPORT_SYMBOL_GPL(ahci_do_hardreset);
@@ -1620,8 +1610,6 @@ static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl)
struct ahci_sg *ahci_sg = cmd_tbl + AHCI_CMD_TBL_HDR_SZ;
unsigned int si;
- VPRINTK("ENTER\n");
-
/*
* Next, the S/G list.
*/
@@ -1695,7 +1683,6 @@ static void ahci_fbs_dec_intr(struct ata_port *ap)
u32 fbs = readl(port_mmio + PORT_FBS);
int retries = 3;
- DPRINTK("ENTER\n");
BUG_ON(!pp->fbs_enabled);
/* time to wait for DEC is not specified by AHCI spec,
@@ -1924,8 +1911,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
void __iomem *port_mmio = ahci_port_base(ap);
u32 status;
- VPRINTK("ENTER\n");
-
status = readl(port_mmio + PORT_IRQ_STAT);
writel(status, port_mmio + PORT_IRQ_STAT);
@@ -1933,8 +1918,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
ahci_handle_port_interrupt(ap, port_mmio, status);
spin_unlock(ap->lock);
- VPRINTK("EXIT\n");
-
return IRQ_HANDLED;
}
@@ -1951,9 +1934,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
ap = host->ports[i];
if (ap) {
ahci_port_intr(ap);
- VPRINTK("port %u\n", i);
} else {
- VPRINTK("port %u (no irq)\n", i);
if (ata_ratelimit())
dev_warn(host->dev,
"interrupt on disabled port %u\n", i);
@@ -1974,8 +1955,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
void __iomem *mmio;
u32 irq_stat, irq_masked;
- VPRINTK("ENTER\n");
-
hpriv = host->private_data;
mmio = hpriv->mmio;
@@ -2003,8 +1982,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
spin_unlock(&host->lock);
- VPRINTK("EXIT\n");
-
return IRQ_RETVAL(rc);
}
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 0910441321f7..18296443ccba 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -579,11 +579,8 @@ int ahci_platform_init_host(struct platform_device *pdev,
int i, irq, n_ports, rc;
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- if (irq != -EPROBE_DEFER)
- dev_err(dev, "no irq\n");
+ if (irq < 0)
return irq;
- }
if (!irq)
return -EINVAL;
@@ -642,13 +639,8 @@ int ahci_platform_init_host(struct platform_device *pdev,
if (hpriv->cap & HOST_CAP_64) {
rc = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (rc) {
- rc = dma_coerce_mask_and_coherent(dev,
- DMA_BIT_MASK(32));
- if (rc) {
- dev_err(dev, "Failed to enable 64-bit DMA.\n");
- return rc;
- }
- dev_warn(dev, "Enable 32-bit DMA instead of 64-bit.\n");
+ dev_err(dev, "Failed to enable 64-bit DMA.\n");
+ return rc;
}
}
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 7a7d6642edcc..8cfa8c96bb13 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -402,7 +402,6 @@ EXPORT_SYMBOL_GPL(ata_acpi_stm);
*/
static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
{
- struct ata_port *ap = dev->link->ap;
acpi_status status;
struct acpi_buffer output;
union acpi_object *out_obj;
@@ -418,10 +417,6 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
output.length = ACPI_ALLOCATE_BUFFER;
output.pointer = NULL; /* ACPI-CA sets this; save/free it later */
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: ENTER: port#: %d\n",
- __func__, ap->port_no);
-
/* _GTF has no input parameters */
status = acpi_evaluate_object(ata_dev_acpi_handle(dev), "_GTF", NULL,
&output);
@@ -437,11 +432,9 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
}
if (!output.length || !output.pointer) {
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: Run _GTF: length or ptr is NULL (0x%llx, 0x%p)\n",
- __func__,
- (unsigned long long)output.length,
- output.pointer);
+ ata_dev_dbg(dev, "Run _GTF: length or ptr is NULL (0x%llx, 0x%p)\n",
+ (unsigned long long)output.length,
+ output.pointer);
rc = -EINVAL;
goto out_free;
}
@@ -464,9 +457,8 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
rc = out_obj->buffer.length / REGS_PER_GTF;
if (gtf) {
*gtf = (void *)out_obj->buffer.pointer;
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: returning gtf=%p, gtf_count=%d\n",
- __func__, *gtf, rc);
+ ata_dev_dbg(dev, "returning gtf=%p, gtf_count=%d\n",
+ *gtf, rc);
}
return rc;
@@ -650,9 +642,7 @@ static int ata_acpi_run_tf(struct ata_device *dev,
struct ata_taskfile *pptf = NULL;
struct ata_taskfile tf, ptf, rtf;
unsigned int err_mask;
- const char *level;
const char *descr;
- char msg[60];
int rc;
if ((gtf->tf[0] == 0) && (gtf->tf[1] == 0) && (gtf->tf[2] == 0)
@@ -666,6 +656,8 @@ static int ata_acpi_run_tf(struct ata_device *dev,
pptf = &ptf;
}
+ descr = ata_get_cmd_name(tf.command);
+
if (!ata_acpi_filter_tf(dev, &tf, pptf)) {
rtf = tf;
err_mask = ata_exec_internal(dev, &rtf, NULL,
@@ -673,40 +665,42 @@ static int ata_acpi_run_tf(struct ata_device *dev,
switch (err_mask) {
case 0:
- level = KERN_DEBUG;
- snprintf(msg, sizeof(msg), "succeeded");
+ ata_dev_dbg(dev,
+ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
+ "(%s) succeeded\n",
+ tf.command, tf.feature, tf.nsect, tf.lbal,
+ tf.lbam, tf.lbah, tf.device, descr);
rc = 1;
break;
case AC_ERR_DEV:
- level = KERN_INFO;
- snprintf(msg, sizeof(msg),
- "rejected by device (Stat=0x%02x Err=0x%02x)",
- rtf.command, rtf.feature);
+ ata_dev_info(dev,
+ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
+ "(%s) rejected by device (Stat=0x%02x Err=0x%02x)",
+ tf.command, tf.feature, tf.nsect, tf.lbal,
+ tf.lbam, tf.lbah, tf.device, descr,
+ rtf.command, rtf.feature);
rc = 0;
break;
default:
- level = KERN_ERR;
- snprintf(msg, sizeof(msg),
- "failed (Emask=0x%x Stat=0x%02x Err=0x%02x)",
- err_mask, rtf.command, rtf.feature);
+ ata_dev_err(dev,
+ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
+ "(%s) failed (Emask=0x%x Stat=0x%02x Err=0x%02x)",
+ tf.command, tf.feature, tf.nsect, tf.lbal,
+ tf.lbam, tf.lbah, tf.device, descr,
+ err_mask, rtf.command, rtf.feature);
rc = -EIO;
break;
}
} else {
- level = KERN_INFO;
- snprintf(msg, sizeof(msg), "filtered out");
+ ata_dev_info(dev,
+ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
+ "(%s) filtered out\n",
+ tf.command, tf.feature, tf.nsect, tf.lbal,
+ tf.lbam, tf.lbah, tf.device, descr);
rc = 0;
}
- descr = ata_get_cmd_descript(tf.command);
-
- ata_dev_printk(dev, level,
- "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n",
- tf.command, tf.feature, tf.nsect, tf.lbal,
- tf.lbam, tf.lbah, tf.device,
- (descr ? descr : "unknown"), msg);
-
return rc;
}
@@ -776,9 +770,8 @@ static int ata_acpi_push_id(struct ata_device *dev)
struct acpi_object_list input;
union acpi_object in_params[1];
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: ix = %d, port#: %d\n",
- __func__, dev->devno, ap->port_no);
+ ata_dev_dbg(dev, "%s: ix = %d, port#: %d\n",
+ __func__, dev->devno, ap->port_no);
/* Give the drive Identify data to the drive via the _SDD method */
/* _SDD: set up input parameters */
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index aba0c67d1bd6..67f88027680a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -764,9 +764,6 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
head = track % dev->heads;
sect = (u32)block % dev->sectors + 1;
- DPRINTK("block %u track %u cyl %u head %u sect %u\n",
- (u32)block, track, cyl, head, sect);
-
/* Check whether the converted CHS can fit.
Cylinder: 0-65535
Head: 0-15
@@ -1010,32 +1007,21 @@ unsigned int ata_dev_classify(const struct ata_taskfile *tf)
* SEMB signature. This is worked around in
* ata_dev_read_id().
*/
- if ((tf->lbam == 0) && (tf->lbah == 0)) {
- DPRINTK("found ATA device by sig\n");
+ if (tf->lbam == 0 && tf->lbah == 0)
return ATA_DEV_ATA;
- }
- if ((tf->lbam == 0x14) && (tf->lbah == 0xeb)) {
- DPRINTK("found ATAPI device by sig\n");
+ if (tf->lbam == 0x14 && tf->lbah == 0xeb)
return ATA_DEV_ATAPI;
- }
- if ((tf->lbam == 0x69) && (tf->lbah == 0x96)) {
- DPRINTK("found PMP device by sig\n");
+ if (tf->lbam == 0x69 && tf->lbah == 0x96)
return ATA_DEV_PMP;
- }
- if ((tf->lbam == 0x3c) && (tf->lbah == 0xc3)) {
- DPRINTK("found SEMB device by sig (could be ATA device)\n");
+ if (tf->lbam == 0x3c && tf->lbah == 0xc3)
return ATA_DEV_SEMB;
- }
- if ((tf->lbam == 0xcd) && (tf->lbah == 0xab)) {
- DPRINTK("found ZAC device by sig\n");
+ if (tf->lbam == 0xcd && tf->lbah == 0xab)
return ATA_DEV_ZAC;
- }
- DPRINTK("unknown device\n");
return ATA_DEV_UNKNOWN;
}
EXPORT_SYMBOL_GPL(ata_dev_classify);
@@ -1355,6 +1341,7 @@ static int ata_hpa_resize(struct ata_device *dev)
/**
* ata_dump_id - IDENTIFY DEVICE info debugging output
+ * @dev: device from which the information is fetched
* @id: IDENTIFY DEVICE page to dump
*
* Dump selected 16-bit words from the given IDENTIFY DEVICE
@@ -1364,32 +1351,14 @@ static int ata_hpa_resize(struct ata_device *dev)
* caller.
*/
-static inline void ata_dump_id(const u16 *id)
-{
- DPRINTK("49==0x%04x "
- "53==0x%04x "
- "63==0x%04x "
- "64==0x%04x "
- "75==0x%04x \n",
- id[49],
- id[53],
- id[63],
- id[64],
- id[75]);
- DPRINTK("80==0x%04x "
- "81==0x%04x "
- "82==0x%04x "
- "83==0x%04x "
- "84==0x%04x \n",
- id[80],
- id[81],
- id[82],
- id[83],
- id[84]);
- DPRINTK("88==0x%04x "
- "93==0x%04x\n",
- id[88],
- id[93]);
+static inline void ata_dump_id(struct ata_device *dev, const u16 *id)
+{
+ ata_dev_dbg(dev,
+ "49==0x%04x 53==0x%04x 63==0x%04x 64==0x%04x 75==0x%04x\n"
+ "80==0x%04x 81==0x%04x 82==0x%04x 83==0x%04x 84==0x%04x\n"
+ "88==0x%04x 93==0x%04x\n",
+ id[49], id[53], id[63], id[64], id[75], id[80],
+ id[81], id[82], id[83], id[84], id[88], id[93]);
}
/**
@@ -1602,9 +1571,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
else
ata_qc_complete(qc);
- if (ata_msg_warn(ap))
- ata_dev_warn(dev, "qc timeout (cmd 0x%x)\n",
- command);
+ ata_dev_warn(dev, "qc timeout (cmd 0x%x)\n",
+ command);
}
spin_unlock_irqrestore(ap->lock, flags);
@@ -1754,7 +1722,7 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
* this function is wrapped or replaced by the driver
*/
unsigned int ata_do_dev_read_id(struct ata_device *dev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
id, sizeof(id[0]) * ATA_ID_WORDS, 0);
@@ -1794,9 +1762,6 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
int may_fallback = 1, tried_spinup = 0;
int rc;
- if (ata_msg_ctl(ap))
- ata_dev_dbg(dev, "%s: ENTER\n", __func__);
-
retry:
ata_tf_init(dev, &tf);
@@ -1830,9 +1795,9 @@ retry:
tf.flags |= ATA_TFLAG_POLLING;
if (ap->ops->read_id)
- err_mask = ap->ops->read_id(dev, &tf, id);
+ err_mask = ap->ops->read_id(dev, &tf, (__le16 *)id);
else
- err_mask = ata_do_dev_read_id(dev, &tf, id);
+ err_mask = ata_do_dev_read_id(dev, &tf, (__le16 *)id);
if (err_mask) {
if (err_mask & AC_ERR_NODEV_HINT) {
@@ -1879,10 +1844,10 @@ retry:
}
if (dev->horkage & ATA_HORKAGE_DUMP_ID) {
- ata_dev_dbg(dev, "dumping IDENTIFY data, "
+ ata_dev_info(dev, "dumping IDENTIFY data, "
"class=%d may_fallback=%d tried_spinup=%d\n",
class, may_fallback, tried_spinup);
- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET,
+ print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET,
16, 2, id, ATA_ID_WORDS * sizeof(*id), true);
}
@@ -1966,9 +1931,8 @@ retry:
return 0;
err_out:
- if (ata_msg_warn(ap))
- ata_dev_warn(dev, "failed to IDENTIFY (%s, err_mask=0x%x)\n",
- reason, err_mask);
+ ata_dev_warn(dev, "failed to IDENTIFY (%s, err_mask=0x%x)\n",
+ reason, err_mask);
return rc;
}
@@ -1996,7 +1960,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
unsigned int err_mask;
bool dma = false;
- DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page);
+ ata_dev_dbg(dev, "read log page - log 0x%x, page 0x%x\n", log, page);
/*
* Return error without actually issuing the command on controllers
@@ -2390,7 +2354,6 @@ static void ata_dev_config_trusted(struct ata_device *dev)
static int ata_dev_config_lba(struct ata_device *dev)
{
- struct ata_port *ap = dev->link->ap;
const u16 *id = dev->id;
const char *lba_desc;
char ncq_desc[24];
@@ -2412,7 +2375,7 @@ static int ata_dev_config_lba(struct ata_device *dev)
ret = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
/* print device info to dmesg */
- if (ata_msg_drv(ap) && ata_dev_print_info(dev))
+ if (ata_dev_print_info(dev))
ata_dev_info(dev,
"%llu sectors, multi %u: %s %s\n",
(unsigned long long)dev->n_sectors,
@@ -2423,7 +2386,6 @@ static int ata_dev_config_lba(struct ata_device *dev)
static void ata_dev_config_chs(struct ata_device *dev)
{
- struct ata_port *ap = dev->link->ap;
const u16 *id = dev->id;
if (ata_id_current_chs_valid(id)) {
@@ -2439,7 +2401,7 @@ static void ata_dev_config_chs(struct ata_device *dev)
}
/* print device info to dmesg */
- if (ata_msg_drv(ap) && ata_dev_print_info(dev))
+ if (ata_dev_print_info(dev))
ata_dev_info(dev,
"%llu sectors, multi %u, CHS %u/%u/%u\n",
(unsigned long long)dev->n_sectors,
@@ -2566,14 +2528,11 @@ int ata_dev_configure(struct ata_device *dev)
char modelbuf[ATA_ID_PROD_LEN+1];
int rc;
- if (!ata_dev_enabled(dev) && ata_msg_info(ap)) {
- ata_dev_info(dev, "%s: ENTER/EXIT -- nodev\n", __func__);
+ if (!ata_dev_enabled(dev)) {
+ ata_dev_dbg(dev, "no device\n");
return 0;
}
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: ENTER\n", __func__);
-
/* set horkage */
dev->horkage |= ata_dev_blacklisted(dev);
ata_force_horkage(dev);
@@ -2621,13 +2580,12 @@ int ata_dev_configure(struct ata_device *dev)
return rc;
/* print device capabilities */
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev,
- "%s: cfg 49:%04x 82:%04x 83:%04x 84:%04x "
- "85:%04x 86:%04x 87:%04x 88:%04x\n",
- __func__,
- id[49], id[82], id[83], id[84],
- id[85], id[86], id[87], id[88]);
+ ata_dev_dbg(dev,
+ "%s: cfg 49:%04x 82:%04x 83:%04x 84:%04x "
+ "85:%04x 86:%04x 87:%04x 88:%04x\n",
+ __func__,
+ id[49], id[82], id[83], id[84],
+ id[85], id[86], id[87], id[88]);
/* initialize to-be-configured parameters */
dev->flags &= ~ATA_DFLAG_CFG_MASK;
@@ -2646,8 +2604,7 @@ int ata_dev_configure(struct ata_device *dev)
/* find max transfer mode; for printk only */
xfer_mask = ata_id_xfermask(id);
- if (ata_msg_probe(ap))
- ata_dump_id(id);
+ ata_dump_id(dev, id);
/* SCSI only uses 4-char revisions, dump full 8 chars from ATA */
ata_id_c_string(dev->id, fwrevbuf, ATA_ID_FW_REV,
@@ -2685,7 +2642,7 @@ int ata_dev_configure(struct ata_device *dev)
}
/* print device info to dmesg */
- if (ata_msg_drv(ap) && print_info)
+ if (print_info)
ata_dev_info(dev, "%s: %s, %s, max %s\n",
revbuf, modelbuf, fwrevbuf,
ata_mode_string(xfer_mask));
@@ -2705,7 +2662,7 @@ int ata_dev_configure(struct ata_device *dev)
ata_dev_config_cpr(dev);
dev->cdb_len = 32;
- if (ata_msg_drv(ap) && print_info)
+ if (print_info)
ata_dev_print_features(dev);
}
@@ -2718,8 +2675,7 @@ int ata_dev_configure(struct ata_device *dev)
rc = atapi_cdb_len(id);
if ((rc < 12) || (rc > ATAPI_CDB_LEN)) {
- if (ata_msg_warn(ap))
- ata_dev_warn(dev, "unsupported CDB len\n");
+ ata_dev_warn(dev, "unsupported CDB len %d\n", rc);
rc = -EINVAL;
goto err_out_nosup;
}
@@ -2763,7 +2719,7 @@ int ata_dev_configure(struct ata_device *dev)
}
/* print device info to dmesg */
- if (ata_msg_drv(ap) && print_info)
+ if (print_info)
ata_dev_info(dev,
"ATAPI: %s, %s, max %s%s%s%s\n",
modelbuf, fwrevbuf,
@@ -2780,7 +2736,7 @@ int ata_dev_configure(struct ata_device *dev)
/* Limit PATA drive on SATA cable bridge transfers to udma5,
200 sectors */
if (ata_dev_knobble(dev)) {
- if (ata_msg_drv(ap) && print_info)
+ if (print_info)
ata_dev_info(dev, "applying bridge limits\n");
dev->udma_mask &= ATA_UDMA5;
dev->max_sectors = ATA_MAX_SECTORS;
@@ -2829,8 +2785,6 @@ int ata_dev_configure(struct ata_device *dev)
return 0;
err_out_nosup:
- if (ata_msg_probe(ap))
- ata_dev_dbg(dev, "%s: EXIT, err\n", __func__);
return rc;
}
@@ -3373,8 +3327,8 @@ static int ata_dev_set_mode(struct ata_device *dev)
dev_err_whine = " (device error ignored)";
}
- DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n",
- dev->xfer_shift, (int)dev->xfer_mode);
+ ata_dev_dbg(dev, "xfer_shift=%u, xfer_mode=0x%x\n",
+ dev->xfer_shift, (int)dev->xfer_mode);
if (!(ehc->i.flags & ATA_EHI_QUIET) ||
ehc->i.flags & ATA_EHI_DID_HARDRESET)
@@ -3688,16 +3642,12 @@ void ata_std_postreset(struct ata_link *link, unsigned int *classes)
{
u32 serror;
- DPRINTK("ENTER\n");
-
/* reset complete, clear SError */
if (!sata_scr_read(link, SCR_ERROR, &serror))
sata_scr_write(link, SCR_ERROR, serror);
/* print link status */
sata_print_link_status(link);
-
- DPRINTK("EXIT\n");
}
EXPORT_SYMBOL_GPL(ata_std_postreset);
@@ -4324,7 +4274,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev)
unsigned int err_mask;
/* set up set-features taskfile */
- DPRINTK("set features - xfer mode\n");
+ ata_dev_dbg(dev, "set features - xfer mode\n");
/* Some controllers and ATAPI devices show flaky interrupt
* behavior after setting xfer mode. Use polling instead.
@@ -4346,7 +4296,6 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev)
/* On some disks, this command causes spin-up, so we need longer timeout */
err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 15000);
- DPRINTK("EXIT, err_mask=%x\n", err_mask);
return err_mask;
}
@@ -4372,7 +4321,7 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature)
unsigned long timeout = 0;
/* set up set-features taskfile */
- DPRINTK("set features - SATA features\n");
+ ata_dev_dbg(dev, "set features - SATA features\n");
ata_tf_init(dev, &tf);
tf.command = ATA_CMD_SET_FEATURES;
@@ -4386,7 +4335,6 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature)
ata_probe_timeout * 1000 : SETFEATURES_SPINUP_TIMEOUT;
err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, timeout);
- DPRINTK("EXIT, err_mask=%x\n", err_mask);
return err_mask;
}
EXPORT_SYMBOL_GPL(ata_dev_set_feature);
@@ -4414,7 +4362,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev,
return AC_ERR_INVALID;
/* set up init dev params taskfile */
- DPRINTK("init dev params \n");
+ ata_dev_dbg(dev, "init dev params \n");
ata_tf_init(dev, &tf);
tf.command = ATA_CMD_INIT_DEV_PARAMS;
@@ -4430,7 +4378,6 @@ static unsigned int ata_dev_init_params(struct ata_device *dev,
if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED))
err_mask = 0;
- DPRINTK("EXIT, err_mask=%x\n", err_mask);
return err_mask;
}
@@ -4542,8 +4489,6 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
WARN_ON_ONCE(sg == NULL);
- VPRINTK("unmapping %u sg elements\n", qc->n_elem);
-
if (qc->n_elem)
dma_unmap_sg(ap->dev, sg, qc->orig_n_elem, dir);
@@ -4569,13 +4514,10 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
unsigned int n_elem;
- VPRINTK("ENTER, ata%u\n", ap->print_id);
-
n_elem = dma_map_sg(ap->dev, qc->sg, qc->n_elem, qc->dma_dir);
if (n_elem < 1)
return -1;
- VPRINTK("%d sg elements mapped\n", n_elem);
qc->orig_n_elem = qc->n_elem;
qc->n_elem = n_elem;
qc->flags |= ATA_QCFLAG_DMAMAP;
@@ -4930,6 +4872,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
return;
}
+ trace_ata_qc_prep(qc);
qc->err_mask |= ap->ops->qc_prep(qc);
if (unlikely(qc->err_mask))
goto err;
@@ -5375,8 +5318,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
{
struct ata_port *ap;
- DPRINTK("ENTER\n");
-
ap = kzalloc(sizeof(*ap), GFP_KERNEL);
if (!ap)
return NULL;
@@ -5388,15 +5329,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
ap->host = host;
ap->dev = host->dev;
-#if defined(ATA_VERBOSE_DEBUG)
- /* turn on all debugging levels */
- ap->msg_enable = 0x00FF;
-#elif defined(ATA_DEBUG)
- ap->msg_enable = ATA_MSG_DRV | ATA_MSG_INFO | ATA_MSG_CTL | ATA_MSG_WARN | ATA_MSG_ERR;
-#else
- ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
-#endif
-
mutex_init(&ap->scsi_scan_mutex);
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
@@ -5493,8 +5425,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
int i;
void *dr;
- DPRINTK("ENTER\n");
-
/* alloc a container for our list of ATA ports (buses) */
sz = sizeof(struct ata_host) + (max_ports + 1) * sizeof(void *);
host = kzalloc(sz, GFP_KERNEL);
@@ -5784,9 +5714,7 @@ int ata_port_probe(struct ata_port *ap)
__ata_port_probe(ap);
ata_port_wait_eh(ap);
} else {
- DPRINTK("ata%u: bus probe begin\n", ap->print_id);
rc = ata_bus_probe(ap);
- DPRINTK("ata%u: bus probe end\n", ap->print_id);
}
return rc;
}
@@ -6553,69 +6481,14 @@ const struct ata_port_info ata_dummy_port_info = {
};
EXPORT_SYMBOL_GPL(ata_dummy_port_info);
-/*
- * Utility print functions
- */
-void ata_port_printk(const struct ata_port *ap, const char *level,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk("%sata%u: %pV", level, ap->print_id, &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ata_port_printk);
-
-void ata_link_printk(const struct ata_link *link, const char *level,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- if (sata_pmp_attached(link->ap) || link->ap->slave_link)
- printk("%sata%u.%02u: %pV",
- level, link->ap->print_id, link->pmp, &vaf);
- else
- printk("%sata%u: %pV",
- level, link->ap->print_id, &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ata_link_printk);
-
-void ata_dev_printk(const struct ata_device *dev, const char *level,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk("%sata%u.%02u: %pV",
- level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
- &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ata_dev_printk);
-
void ata_print_version(const struct device *dev, const char *version)
{
dev_printk(KERN_DEBUG, dev, "version %s\n", version);
}
EXPORT_SYMBOL(ata_print_version);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(ata_tf_load);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ata_exec_command);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_setup);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ata_bmdma_status);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 1d4a6f1e88cd..7951fd946bf9 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -533,8 +533,6 @@ void ata_scsi_error(struct Scsi_Host *host)
unsigned long flags;
LIST_HEAD(eh_work_q);
- DPRINTK("ENTER\n");
-
spin_lock_irqsave(host->host_lock, flags);
list_splice_init(&host->eh_cmd_q, &eh_work_q);
spin_unlock_irqrestore(host->host_lock, flags);
@@ -548,7 +546,6 @@ void ata_scsi_error(struct Scsi_Host *host)
/* finish or retry handled scmd's and clean up */
WARN_ON(!list_empty(&eh_work_q));
- DPRINTK("EXIT\n");
}
/**
@@ -940,7 +937,7 @@ void ata_std_sched_eh(struct ata_port *ap)
ata_eh_set_pending(ap, 1);
scsi_schedule_eh(ap->scsi_host);
- DPRINTK("port EH scheduled\n");
+ trace_ata_std_sched_eh(ap);
}
EXPORT_SYMBOL_GPL(ata_std_sched_eh);
@@ -1070,7 +1067,7 @@ static void __ata_port_freeze(struct ata_port *ap)
ap->pflags |= ATA_PFLAG_FROZEN;
- DPRINTK("ata%u port frozen\n", ap->print_id);
+ trace_ata_port_freeze(ap);
}
/**
@@ -1147,7 +1144,7 @@ void ata_eh_thaw_port(struct ata_port *ap)
spin_unlock_irqrestore(ap->lock, flags);
- DPRINTK("ata%u port thawed\n", ap->print_id);
+ trace_ata_port_thaw(ap);
}
static void ata_eh_scsidone(struct scsi_cmnd *scmd)
@@ -1217,8 +1214,7 @@ void ata_dev_disable(struct ata_device *dev)
if (!ata_dev_enabled(dev))
return;
- if (ata_msg_drv(dev->link->ap))
- ata_dev_warn(dev, "disabled\n");
+ ata_dev_warn(dev, "disable device\n");
ata_acpi_on_disable(dev);
ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
dev->class++;
@@ -1287,6 +1283,8 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
struct ata_eh_context *ehc = &link->eh_context;
unsigned long flags;
+ trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action);
+
spin_lock_irqsave(ap->lock, flags);
ata_eh_clear_action(link, dev, ehi, action);
@@ -1317,6 +1315,8 @@ void ata_eh_done(struct ata_link *link, struct ata_device *dev,
{
struct ata_eh_context *ehc = &link->eh_context;
+ trace_ata_eh_done(link, dev ? dev->devno : 0, action);
+
ata_eh_clear_action(link, dev, &ehc->i, action);
}
@@ -1421,8 +1421,6 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc,
return;
}
- DPRINTK("ATA request sense\n");
-
ata_tf_init(dev, &tf);
tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
@@ -1463,8 +1461,6 @@ unsigned int atapi_eh_request_sense(struct ata_device *dev,
struct ata_port *ap = dev->link->ap;
struct ata_taskfile tf;
- DPRINTK("ATAPI request sense\n");
-
memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
/* initialize sense_buf with the error register,
@@ -1928,8 +1924,6 @@ static void ata_eh_link_autopsy(struct ata_link *link)
u32 serror;
int rc;
- DPRINTK("ENTER\n");
-
if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
return;
@@ -2036,7 +2030,6 @@ static void ata_eh_link_autopsy(struct ata_link *link)
ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
}
- DPRINTK("EXIT\n");
}
/**
@@ -2086,16 +2079,15 @@ void ata_eh_autopsy(struct ata_port *ap)
}
/**
- * ata_get_cmd_descript - get description for ATA command
- * @command: ATA command code to get description for
+ * ata_get_cmd_name - get name for ATA command
+ * @command: ATA command code to get name for
*
- * Return a textual description of the given command, or NULL if the
- * command is not known.
+ * Return a textual name of the given command or "unknown"
*
* LOCKING:
* None
*/
-const char *ata_get_cmd_descript(u8 command)
+const char *ata_get_cmd_name(u8 command)
{
#ifdef CONFIG_ATA_VERBOSE_ERROR
static const struct
@@ -2203,9 +2195,9 @@ const char *ata_get_cmd_descript(u8 command)
return cmd_descr[i].text;
#endif
- return NULL;
+ return "unknown";
}
-EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
+EXPORT_SYMBOL_GPL(ata_get_cmd_name);
/**
* ata_eh_link_report - report error handling to user
@@ -2354,12 +2346,9 @@ static void ata_eh_link_report(struct ata_link *link)
}
__scsi_format_command(cdb_buf, sizeof(cdb_buf),
cdb, cdb_len);
- } else {
- const char *descr = ata_get_cmd_descript(cmd->command);
- if (descr)
- ata_dev_err(qc->dev, "failed command: %s\n",
- descr);
- }
+ } else
+ ata_dev_err(qc->dev, "failed command: %s\n",
+ ata_get_cmd_name(cmd->command));
ata_dev_err(qc->dev,
"cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
@@ -2596,12 +2585,19 @@ int ata_eh_reset(struct ata_link *link, int classify,
/* mark that this EH session started with reset */
ehc->last_reset = jiffies;
- if (reset == hardreset)
+ if (reset == hardreset) {
ehc->i.flags |= ATA_EHI_DID_HARDRESET;
- else
+ trace_ata_link_hardreset_begin(link, classes, deadline);
+ } else {
ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
+ trace_ata_link_softreset_begin(link, classes, deadline);
+ }
rc = ata_do_reset(link, reset, classes, deadline, true);
+ if (reset == hardreset)
+ trace_ata_link_hardreset_end(link, classes, rc);
+ else
+ trace_ata_link_softreset_end(link, classes, rc);
if (rc && rc != -EAGAIN) {
failed_link = link;
goto fail;
@@ -2615,8 +2611,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
ata_link_info(slave, "hard resetting link\n");
ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
+ trace_ata_slave_hardreset_begin(slave, classes,
+ deadline);
tmp = ata_do_reset(slave, reset, classes, deadline,
false);
+ trace_ata_slave_hardreset_end(slave, classes, tmp);
switch (tmp) {
case -EAGAIN:
rc = -EAGAIN;
@@ -2644,7 +2643,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
}
ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+ trace_ata_link_softreset_begin(link, classes, deadline);
rc = ata_do_reset(link, reset, classes, deadline, true);
+ trace_ata_link_softreset_end(link, classes, rc);
if (rc) {
failed_link = link;
goto fail;
@@ -2698,8 +2699,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
*/
if (postreset) {
postreset(link, classes);
- if (slave)
+ trace_ata_link_postreset(link, classes, rc);
+ if (slave) {
postreset(slave, classes);
+ trace_ata_slave_postreset(slave, classes, rc);
+ }
}
/*
@@ -2921,8 +2925,6 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
unsigned long flags;
int rc = 0;
- DPRINTK("ENTER\n");
-
/* For PATA drive side cable detection to work, IDENTIFY must
* be done backwards such that PDIAG- is released by the slave
* device before the master device is identified.
@@ -3036,7 +3038,6 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
err:
*r_failed_dev = dev;
- DPRINTK("EXIT rc=%d\n", rc);
return rc;
}
@@ -3551,8 +3552,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
int rc, nr_fails;
unsigned long flags, deadline;
- DPRINTK("ENTER\n");
-
/* prep for recovery */
ata_for_each_link(link, ap, EDGE) {
struct ata_eh_context *ehc = &link->eh_context;
@@ -3760,7 +3759,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
if (rc && r_failed_link)
*r_failed_link = link;
- DPRINTK("EXIT, rc=%d\n", rc);
return rc;
}
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index ba7be3f38617..e2e9cbd405fa 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -652,8 +652,6 @@ static int sata_pmp_revalidate(struct ata_device *dev, unsigned int new_class)
u32 *gscr = (void *)ap->sector_buf;
int rc;
- DPRINTK("ENTER\n");
-
ata_eh_about_to_do(link, NULL, ATA_EH_REVALIDATE);
if (!ata_dev_enabled(dev)) {
@@ -686,12 +684,10 @@ static int sata_pmp_revalidate(struct ata_device *dev, unsigned int new_class)
ata_eh_done(link, NULL, ATA_EH_REVALIDATE);
- DPRINTK("EXIT, rc=0\n");
return 0;
fail:
ata_dev_err(dev, "PMP revalidation failed (errno=%d)\n", rc);
- DPRINTK("EXIT, rc=%d\n", rc);
return rc;
}
@@ -759,8 +755,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
int detach = 0, rc = 0;
int reval_failed = 0;
- DPRINTK("ENTER\n");
-
if (dev->flags & ATA_DFLAG_DETACH) {
detach = 1;
rc = -ENODEV;
@@ -828,7 +822,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
/* okay, PMP resurrected */
ehc->i.flags = 0;
- DPRINTK("EXIT, rc=0\n");
return 0;
fail:
@@ -838,7 +831,6 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
else
ata_dev_disable(dev);
- DPRINTK("EXIT, rc=%d\n", rc);
return rc;
}
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index b9c77885b872..071158c0c44c 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -317,7 +317,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
* immediately after resuming. Delay 200ms before
* debouncing.
*/
- if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
+ if (!(link->flags & ATA_LFLAG_NO_DEBOUNCE_DELAY))
ata_msleep(link->ap, 200);
/* is SControl restored correctly? */
@@ -533,8 +533,6 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
u32 scontrol;
int rc;
- DPRINTK("ENTER\n");
-
if (online)
*online = false;
@@ -610,7 +608,6 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
*online = false;
ata_link_err(link, "COMRESET failed (errno=%d)\n", rc);
}
- DPRINTK("EXIT, rc=%d\n", rc);
return rc;
}
EXPORT_SYMBOL_GPL(sata_link_hardreset);
@@ -876,7 +873,7 @@ static ssize_t ata_ncq_prio_enable_show(struct device *device,
ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
spin_unlock_irq(ap->lock);
- return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
+ return rc ? rc : sysfs_emit(buf, "%u\n", ncq_prio_enable);
}
static ssize_t ata_ncq_prio_enable_store(struct device *device,
@@ -972,7 +969,7 @@ ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
struct Scsi_Host *shost = class_to_shost(dev);
struct ata_port *ap = ata_shost_to_port(shost);
- return snprintf(buf, 23, "%d\n", ap->em_message_type);
+ return sysfs_emit(buf, "%d\n", ap->em_message_type);
}
DEVICE_ATTR(em_message_type, S_IRUGO,
ata_scsi_em_message_type_show, NULL);
@@ -1261,8 +1258,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
{
int rc = 0;
- ata_scsi_dump_cdb(ap, cmd);
-
if (likely(ata_dev_enabled(ap->link.device)))
rc = __ata_scsi_queuecmd(cmd, ap->link.device);
else {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 313e9475507b..ed8be585a98f 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -121,7 +121,7 @@ static ssize_t ata_scsi_park_show(struct device *device,
unlock:
spin_unlock_irq(ap->lock);
- return rc ? rc : snprintf(buf, 20, "%u\n", msecs);
+ return rc ? rc : sysfs_emit(buf, "%u\n", msecs);
}
static ssize_t ata_scsi_park_store(struct device *device,
@@ -668,7 +668,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
/**
* ata_dump_status - user friendly display of error info
- * @id: id of the port in question
+ * @ap: the port in question
* @tf: ptr to filled out taskfile
*
* Decode and dump the ATA error/status registers for the user so
@@ -678,37 +678,32 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
* LOCKING:
* inherited from caller
*/
-static void ata_dump_status(unsigned id, struct ata_taskfile *tf)
+static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
{
u8 stat = tf->command, err = tf->feature;
- pr_warn("ata%u: status=0x%02x { ", id, stat);
if (stat & ATA_BUSY) {
- pr_cont("Busy }\n"); /* Data is not valid in this case */
+ ata_port_warn(ap, "status=0x%02x {Busy} ", stat);
} else {
- if (stat & ATA_DRDY) pr_cont("DriveReady ");
- if (stat & ATA_DF) pr_cont("DeviceFault ");
- if (stat & ATA_DSC) pr_cont("SeekComplete ");
- if (stat & ATA_DRQ) pr_cont("DataRequest ");
- if (stat & ATA_CORR) pr_cont("CorrectedError ");
- if (stat & ATA_SENSE) pr_cont("Sense ");
- if (stat & ATA_ERR) pr_cont("Error ");
- pr_cont("}\n");
-
- if (err) {
- pr_warn("ata%u: error=0x%02x { ", id, err);
- if (err & ATA_ABORTED) pr_cont("DriveStatusError ");
- if (err & ATA_ICRC) {
- if (err & ATA_ABORTED)
- pr_cont("BadCRC ");
- else pr_cont("Sector ");
- }
- if (err & ATA_UNC) pr_cont("UncorrectableError ");
- if (err & ATA_IDNF) pr_cont("SectorIdNotFound ");
- if (err & ATA_TRK0NF) pr_cont("TrackZeroNotFound ");
- if (err & ATA_AMNF) pr_cont("AddrMarkNotFound ");
- pr_cont("}\n");
- }
+ ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat,
+ stat & ATA_DRDY ? "DriveReady " : "",
+ stat & ATA_DF ? "DeviceFault " : "",
+ stat & ATA_DSC ? "SeekComplete " : "",
+ stat & ATA_DRQ ? "DataRequest " : "",
+ stat & ATA_CORR ? "CorrectedError " : "",
+ stat & ATA_SENSE ? "Sense " : "",
+ stat & ATA_ERR ? "Error " : "");
+ if (err)
+ ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err,
+ err & ATA_ABORTED ?
+ "DriveStatusError " : "",
+ err & ATA_ICRC ?
+ (err & ATA_ABORTED ?
+ "BadCRC " : "Sector ") : "",
+ err & ATA_UNC ? "UncorrectableError " : "",
+ err & ATA_IDNF ? "SectorIdNotFound " : "",
+ err & ATA_TRK0NF ? "TrackZeroNotFound " : "",
+ err & ATA_AMNF ? "AddrMarkNotFound " : "");
}
}
@@ -1299,8 +1294,6 @@ static void scsi_6_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
u64 lba = 0;
u32 len;
- VPRINTK("six-byte command\n");
-
lba |= ((u64)(cdb[1] & 0x1f)) << 16;
lba |= ((u64)cdb[2]) << 8;
lba |= ((u64)cdb[3]);
@@ -1326,8 +1319,6 @@ static void scsi_10_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
u64 lba = 0;
u32 len = 0;
- VPRINTK("ten-byte command\n");
-
lba |= ((u64)cdb[2]) << 24;
lba |= ((u64)cdb[3]) << 16;
lba |= ((u64)cdb[4]) << 8;
@@ -1355,8 +1346,6 @@ static void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
u64 lba = 0;
u32 len = 0;
- VPRINTK("sixteen-byte command\n");
-
lba |= ((u64)cdb[2]) << 56;
lba |= ((u64)cdb[3]) << 48;
lba |= ((u64)cdb[4]) << 40;
@@ -1469,9 +1458,6 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc)
head = track % dev->heads;
sect = (u32)block % dev->sectors + 1;
- DPRINTK("block %u track %u cyl %u head %u sect %u\n",
- (u32)block, track, cyl, head, sect);
-
/* Check whether the converted CHS can fit.
Cylinder: 0-65535
Head: 0-15
@@ -1594,7 +1580,6 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
goto invalid_fld;
break;
default:
- DPRINTK("no-byte command\n");
fp = 0;
goto invalid_fld;
}
@@ -1672,7 +1657,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
cmd->result = SAM_STAT_GOOD;
if (need_sense && !ap->ops->error_handler)
- ata_dump_status(ap->print_id, &qc->result_tf);
+ ata_dump_status(ap, &qc->result_tf);
ata_qc_done(qc);
}
@@ -1710,8 +1695,6 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
struct ata_queued_cmd *qc;
int rc;
- VPRINTK("ENTER\n");
-
qc = ata_scsi_qc_new(dev, cmd);
if (!qc)
goto err_mem;
@@ -1742,13 +1725,11 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
/* select device, send command to hardware */
ata_qc_issue(qc);
- VPRINTK("EXIT\n");
return 0;
early_finish:
ata_qc_free(qc);
scsi_done(cmd);
- DPRINTK("EXIT - early finish (good or error)\n");
return 0;
err_did:
@@ -1756,12 +1737,10 @@ err_did:
cmd->result = (DID_ERROR << 16);
scsi_done(cmd);
err_mem:
- DPRINTK("EXIT - internal\n");
return 0;
defer:
ata_qc_free(qc);
- DPRINTK("EXIT - defer\n");
if (rc == ATA_DEFER_LINK)
return SCSI_MLQUEUE_DEVICE_BUSY;
else
@@ -1858,8 +1837,6 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
2
};
- VPRINTK("ENTER\n");
-
/* set scsi removable (RMB) bit per ata bit, or if the
* AHCI port says it's external (Hotplug-capable, eSATA).
*/
@@ -2294,8 +2271,6 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
u8 dpofua, bp = 0xff;
u16 fp;
- VPRINTK("ENTER\n");
-
six_byte = (scsicmd[0] == MODE_SENSE);
ebd = !(scsicmd[1] & 0x8); /* dbd bit inverted == edb */
/*
@@ -2413,8 +2388,6 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
log2_per_phys = ata_id_log2_per_physical_sector(dev->id);
lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys);
- VPRINTK("ENTER\n");
-
if (args->cmd->cmnd[0] == READ_CAPACITY) {
if (last_lba >= 0xffffffffULL)
last_lba = 0xffffffff;
@@ -2481,7 +2454,6 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
*/
static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf)
{
- VPRINTK("ENTER\n");
rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */
return 0;
@@ -2512,8 +2484,6 @@ static void atapi_request_sense(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
- DPRINTK("ATAPI request sense\n");
-
memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
#ifdef CONFIG_ATA_SFF
@@ -2552,8 +2522,6 @@ static void atapi_request_sense(struct ata_queued_cmd *qc)
qc->complete_fn = atapi_sense_complete;
ata_qc_issue(qc);
-
- DPRINTK("EXIT\n");
}
/*
@@ -2581,8 +2549,6 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
struct scsi_cmnd *cmd = qc->scsicmd;
unsigned int err_mask = qc->err_mask;
- VPRINTK("ENTER, err_mask 0x%X\n", err_mask);
-
/* handle completion from new EH */
if (unlikely(qc->ap->ops->error_handler &&
(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) {
@@ -2663,7 +2629,6 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
if (scmd->sc_data_direction == DMA_TO_DEVICE) {
qc->tf.flags |= ATA_TFLAG_WRITE;
- DPRINTK("direction: write\n");
}
qc->tf.command = ATA_CMD_PACKET;
@@ -3591,10 +3556,7 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc,
*/
if (len != CACHE_MPAGE_LEN - 2) {
- if (len < CACHE_MPAGE_LEN - 2)
- *fp = len;
- else
- *fp = CACHE_MPAGE_LEN - 2;
+ *fp = min(len, CACHE_MPAGE_LEN - 2);
return -EINVAL;
}
@@ -3647,10 +3609,7 @@ static int ata_mselect_control(struct ata_queued_cmd *qc,
*/
if (len != CONTROL_MPAGE_LEN - 2) {
- if (len < CONTROL_MPAGE_LEN - 2)
- *fp = len;
- else
- *fp = CONTROL_MPAGE_LEN - 2;
+ *fp = min(len, CONTROL_MPAGE_LEN - 2);
return -EINVAL;
}
@@ -3698,8 +3657,6 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
u8 buffer[64];
const u8 *p = buffer;
- VPRINTK("ENTER\n");
-
six_byte = (cdb[0] == MODE_SELECT);
if (six_byte) {
if (scmd->cmd_len < 5) {
@@ -3997,70 +3954,45 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
return NULL;
}
-/**
- * ata_scsi_dump_cdb - dump SCSI command contents to dmesg
- * @ap: ATA port to which the command was being sent
- * @cmd: SCSI command to dump
- *
- * Prints the contents of a SCSI command via printk().
- */
-
-void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd)
-{
-#ifdef ATA_VERBOSE_DEBUG
- struct scsi_device *scsidev = cmd->device;
-
- VPRINTK("CDB (%u:%d,%d,%lld) %9ph\n",
- ap->print_id,
- scsidev->channel, scsidev->id, scsidev->lun,
- cmd->cmnd);
-#endif
-}
-
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
{
u8 scsi_op = scmd->cmnd[0];
ata_xlat_func_t xlat_func;
- int rc = 0;
+
+ if (unlikely(!scmd->cmd_len))
+ goto bad_cdb_len;
if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
- if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len))
+ if (unlikely(scmd->cmd_len > dev->cdb_len))
goto bad_cdb_len;
xlat_func = ata_get_xlat_func(dev, scsi_op);
- } else {
- if (unlikely(!scmd->cmd_len))
- goto bad_cdb_len;
+ } else if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
+ /* relay SCSI command to ATAPI device */
+ int len = COMMAND_SIZE(scsi_op);
- xlat_func = NULL;
- if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
- /* relay SCSI command to ATAPI device */
- int len = COMMAND_SIZE(scsi_op);
- if (unlikely(len > scmd->cmd_len ||
- len > dev->cdb_len ||
- scmd->cmd_len > ATAPI_CDB_LEN))
- goto bad_cdb_len;
+ if (unlikely(len > scmd->cmd_len ||
+ len > dev->cdb_len ||
+ scmd->cmd_len > ATAPI_CDB_LEN))
+ goto bad_cdb_len;
- xlat_func = atapi_xlat;
- } else {
- /* ATA_16 passthru, treat as an ATA command */
- if (unlikely(scmd->cmd_len > 16))
- goto bad_cdb_len;
+ xlat_func = atapi_xlat;
+ } else {
+ /* ATA_16 passthru, treat as an ATA command */
+ if (unlikely(scmd->cmd_len > 16))
+ goto bad_cdb_len;
- xlat_func = ata_get_xlat_func(dev, scsi_op);
- }
+ xlat_func = ata_get_xlat_func(dev, scsi_op);
}
if (xlat_func)
- rc = ata_scsi_translate(dev, scmd, xlat_func);
- else
- ata_scsi_simulate(dev, scmd);
+ return ata_scsi_translate(dev, scmd, xlat_func);
- return rc;
+ ata_scsi_simulate(dev, scmd);
+
+ return 0;
bad_cdb_len:
- DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n",
- scmd->cmd_len, scsi_op, dev->cdb_len);
scmd->result = DID_ERROR << 16;
scsi_done(scmd);
return 0;
@@ -4097,8 +4029,6 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
spin_lock_irqsave(ap->lock, irq_flags);
- ata_scsi_dump_cdb(ap, cmd);
-
dev = ata_scsi_find_dev(ap, scsidev);
if (likely(dev))
rc = __ata_scsi_queuecmd(cmd, dev);
@@ -4531,12 +4461,9 @@ void ata_scsi_hotplug(struct work_struct *work)
container_of(work, struct ata_port, hotplug_task.work);
int i;
- if (ap->pflags & ATA_PFLAG_UNLOADING) {
- DPRINTK("ENTER/EXIT - unloading\n");
+ if (ap->pflags & ATA_PFLAG_UNLOADING)
return;
- }
- DPRINTK("ENTER\n");
mutex_lock(&ap->scsi_scan_mutex);
/* Unplug detached devices. We cannot use link iterator here
@@ -4552,7 +4479,6 @@ void ata_scsi_hotplug(struct work_struct *work)
ata_scsi_scan_host(ap, 0);
mutex_unlock(&ap->scsi_scan_mutex);
- DPRINTK("EXIT\n");
}
/**
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index b71ea4a680b0..75217828dfe3 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -18,7 +18,7 @@
#include <linux/module.h>
#include <linux/libata.h>
#include <linux/highmem.h>
-
+#include <trace/events/libata.h>
#include "libata.h"
static struct workqueue_struct *ata_sff_wq;
@@ -330,10 +330,6 @@ EXPORT_SYMBOL_GPL(ata_sff_dev_select);
static void ata_dev_select(struct ata_port *ap, unsigned int device,
unsigned int wait, unsigned int can_sleep)
{
- if (ata_msg_probe(ap))
- ata_port_info(ap, "ata_dev_select: ENTER, device %u, wait %u\n",
- device, wait);
-
if (wait)
ata_wait_idle(ap);
@@ -409,12 +405,6 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
iowrite8(tf->hob_lbal, ioaddr->lbal_addr);
iowrite8(tf->hob_lbam, ioaddr->lbam_addr);
iowrite8(tf->hob_lbah, ioaddr->lbah_addr);
- VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
- tf->hob_feature,
- tf->hob_nsect,
- tf->hob_lbal,
- tf->hob_lbam,
- tf->hob_lbah);
}
if (is_addr) {
@@ -423,18 +413,10 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
iowrite8(tf->lbal, ioaddr->lbal_addr);
iowrite8(tf->lbam, ioaddr->lbam_addr);
iowrite8(tf->lbah, ioaddr->lbah_addr);
- VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n",
- tf->feature,
- tf->nsect,
- tf->lbal,
- tf->lbam,
- tf->lbah);
}
- if (tf->flags & ATA_TFLAG_DEVICE) {
+ if (tf->flags & ATA_TFLAG_DEVICE)
iowrite8(tf->device, ioaddr->device_addr);
- VPRINTK("device 0x%X\n", tf->device);
- }
ata_wait_idle(ap);
}
@@ -494,8 +476,6 @@ EXPORT_SYMBOL_GPL(ata_sff_tf_read);
*/
void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf)
{
- DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
-
iowrite8(tf->command, ap->ioaddr.command_addr);
ata_sff_pause(ap);
}
@@ -505,6 +485,7 @@ EXPORT_SYMBOL_GPL(ata_sff_exec_command);
* ata_tf_to_host - issue ATA taskfile to host controller
* @ap: port to which command is being issued
* @tf: ATA taskfile register set
+ * @tag: tag of the associated command
*
* Issues ATA taskfile register set to ATA host controller,
* with proper synchronization with interrupt handler and
@@ -514,9 +495,12 @@ EXPORT_SYMBOL_GPL(ata_sff_exec_command);
* spin_lock_irqsave(host lock)
*/
static inline void ata_tf_to_host(struct ata_port *ap,
- const struct ata_taskfile *tf)
+ const struct ata_taskfile *tf,
+ unsigned int tag)
{
+ trace_ata_tf_load(ap, tf);
ap->ops->sff_tf_load(ap, tf);
+ trace_ata_exec_command(ap, tf, tag);
ap->ops->sff_exec_command(ap, tf);
}
@@ -680,7 +664,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
page = nth_page(page, (offset >> PAGE_SHIFT));
offset %= PAGE_SIZE;
- DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
+ trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size);
/*
* Split the transfer when it splits a page boundary. Note that the
@@ -750,7 +734,7 @@ static void ata_pio_sectors(struct ata_queued_cmd *qc)
static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc)
{
/* send SCSI cdb */
- DPRINTK("send cdb\n");
+ trace_atapi_send_cdb(qc, 0, qc->dev->cdb_len);
WARN_ON_ONCE(qc->dev->cdb_len < 12);
ap->ops->sff_data_xfer(qc, qc->cdb, qc->dev->cdb_len, 1);
@@ -768,6 +752,7 @@ static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc)
case ATAPI_PROT_DMA:
ap->hsm_task_state = HSM_ST_LAST;
/* initiate bmdma */
+ trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
ap->ops->bmdma_start(qc);
break;
#endif /* CONFIG_ATA_BMDMA */
@@ -820,7 +805,7 @@ next_sg:
/* don't cross page boundaries */
count = min(count, (unsigned int)PAGE_SIZE - offset);
- DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
+ trace_atapi_pio_transfer_data(qc, offset, count);
/* do the actual data transfer */
buf = kmap_atomic(page);
@@ -888,8 +873,6 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc)
if (unlikely(!bytes))
goto atapi_check;
- VPRINTK("ata%u: xfering %d bytes\n", ap->print_id, bytes);
-
if (unlikely(__atapi_pio_bytes(qc, bytes)))
goto err_out;
ata_sff_sync(ap); /* flush */
@@ -1002,8 +985,7 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
WARN_ON_ONCE(in_wq != ata_hsm_ok_in_wq(ap, qc));
fsm_start:
- DPRINTK("ata%u: protocol %d task_state %d (dev_stat 0x%X)\n",
- ap->print_id, qc->tf.protocol, ap->hsm_task_state, status);
+ trace_ata_sff_hsm_state(qc, status);
switch (ap->hsm_task_state) {
case HSM_ST_FIRST:
@@ -1204,8 +1186,7 @@ fsm_start:
}
/* no more data to transfer */
- DPRINTK("ata%u: dev %u command complete, drv_stat 0x%x\n",
- ap->print_id, qc->dev->devno, status);
+ trace_ata_sff_hsm_command_complete(qc, status);
WARN_ON_ONCE(qc->err_mask & (AC_ERR_DEV | AC_ERR_HSM));
@@ -1262,7 +1243,7 @@ EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task);
void ata_sff_flush_pio_task(struct ata_port *ap)
{
- DPRINTK("ENTER\n");
+ trace_ata_sff_flush_pio_task(ap);
cancel_delayed_work_sync(&ap->sff_pio_task);
@@ -1279,9 +1260,6 @@ void ata_sff_flush_pio_task(struct ata_port *ap)
spin_unlock_irq(ap->lock);
ap->sff_pio_task_link = NULL;
-
- if (ata_msg_ctl(ap))
- ata_port_dbg(ap, "%s: EXIT\n", __func__);
}
static void ata_sff_pio_task(struct work_struct *work)
@@ -1376,7 +1354,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
if (qc->tf.flags & ATA_TFLAG_POLLING)
ata_qc_set_polling(qc);
- ata_tf_to_host(ap, &qc->tf);
+ ata_tf_to_host(ap, &qc->tf, qc->tag);
ap->hsm_task_state = HSM_ST_LAST;
if (qc->tf.flags & ATA_TFLAG_POLLING)
@@ -1388,7 +1366,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
if (qc->tf.flags & ATA_TFLAG_POLLING)
ata_qc_set_polling(qc);
- ata_tf_to_host(ap, &qc->tf);
+ ata_tf_to_host(ap, &qc->tf, qc->tag);
if (qc->tf.flags & ATA_TFLAG_WRITE) {
/* PIO data out protocol */
@@ -1418,7 +1396,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
if (qc->tf.flags & ATA_TFLAG_POLLING)
ata_qc_set_polling(qc);
- ata_tf_to_host(ap, &qc->tf);
+ ata_tf_to_host(ap, &qc->tf, qc->tag);
ap->hsm_task_state = HSM_ST_FIRST;
@@ -1478,8 +1456,7 @@ static unsigned int __ata_sff_port_intr(struct ata_port *ap,
{
u8 status;
- VPRINTK("ata%u: protocol %d task_state %d\n",
- ap->print_id, qc->tf.protocol, ap->hsm_task_state);
+ trace_ata_sff_port_intr(qc, hsmv_on_idle);
/* Check whether we are expecting interrupt in this state */
switch (ap->hsm_task_state) {
@@ -1853,7 +1830,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
return ATA_DEV_NONE;
/* determine if device is ATA or ATAPI */
- class = ata_dev_classify(&tf);
+ class = ata_port_classify(ap, &tf);
if (class == ATA_DEV_UNKNOWN) {
/* If the device failed diagnostic, it's likely to
@@ -1956,8 +1933,6 @@ static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask,
{
struct ata_ioports *ioaddr = &ap->ioaddr;
- DPRINTK("ata%u: bus reset via SRST\n", ap->print_id);
-
if (ap->ioaddr.ctl_addr) {
/* software reset. causes dev0 to be selected */
iowrite8(ap->ctl, ioaddr->ctl_addr);
@@ -1995,8 +1970,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes,
int rc;
u8 err;
- DPRINTK("ENTER\n");
-
/* determine if device 0/1 are present */
if (ata_devchk(ap, 0))
devmask |= (1 << 0);
@@ -2007,7 +1980,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes,
ap->ops->sff_dev_select(ap, 0);
/* issue bus reset */
- DPRINTK("about to softreset, devmask=%x\n", devmask);
rc = ata_bus_softreset(ap, devmask, deadline);
/* if link is occupied, -ENODEV too is an error */
if (rc && (rc != -ENODEV || sata_scr_valid(link))) {
@@ -2022,7 +1994,6 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes,
classes[1] = ata_sff_dev_classify(&link->device[1],
devmask & (1 << 1), &err);
- DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
return 0;
}
EXPORT_SYMBOL_GPL(ata_sff_softreset);
@@ -2055,7 +2026,6 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
if (online)
*class = ata_sff_dev_classify(link->device, 1, NULL);
- DPRINTK("EXIT, class=%u\n", *class);
return rc;
}
EXPORT_SYMBOL_GPL(sata_sff_hardreset);
@@ -2085,10 +2055,8 @@ void ata_sff_postreset(struct ata_link *link, unsigned int *classes)
ap->ops->sff_dev_select(ap, 0);
/* bail out if no device is present */
- if (classes[0] == ATA_DEV_NONE && classes[1] == ATA_DEV_NONE) {
- DPRINTK("EXIT, no device\n");
+ if (classes[0] == ATA_DEV_NONE && classes[1] == ATA_DEV_NONE)
return;
- }
/* set up device control */
if (ap->ops->sff_set_devctl || ap->ioaddr.ctl_addr) {
@@ -2123,7 +2091,6 @@ void ata_sff_drain_fifo(struct ata_queued_cmd *qc)
&& count < 65536; count += 2)
ioread16(ap->ioaddr.data_addr);
- /* Can become DEBUG later */
if (count)
ata_port_dbg(ap, "drained %d bytes to clear DRQ\n", count);
@@ -2467,8 +2434,6 @@ static int ata_pci_init_one(struct pci_dev *pdev,
struct ata_host *host = NULL;
int rc;
- DPRINTK("ENTER\n");
-
pi = ata_sff_find_valid_pi(ppi);
if (!pi) {
dev_err(&pdev->dev, "no valid port_info specified\n");
@@ -2614,7 +2579,6 @@ static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
prd[pi].addr = cpu_to_le32(addr);
prd[pi].flags_len = cpu_to_le32(len & 0xffff);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
pi++;
sg_len -= len;
@@ -2674,7 +2638,6 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
prd[++pi].addr = cpu_to_le32(addr + 0x8000);
}
prd[pi].flags_len = cpu_to_le32(blen);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
pi++;
sg_len -= len;
@@ -2756,8 +2719,11 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
case ATA_PROT_DMA:
WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
+ trace_ata_bmdma_setup(ap, &qc->tf, qc->tag);
ap->ops->bmdma_setup(qc); /* set up bmdma */
+ trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
ap->ops->bmdma_start(qc); /* initiate bmdma */
ap->hsm_task_state = HSM_ST_LAST;
break;
@@ -2765,7 +2731,9 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
case ATAPI_PROT_DMA:
WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
+ trace_ata_bmdma_setup(ap, &qc->tf, qc->tag);
ap->ops->bmdma_setup(qc); /* set up bmdma */
ap->hsm_task_state = HSM_ST_FIRST;
@@ -2806,13 +2774,14 @@ unsigned int ata_bmdma_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
if (ap->hsm_task_state == HSM_ST_LAST && ata_is_dma(qc->tf.protocol)) {
/* check status of DMA engine */
host_stat = ap->ops->bmdma_status(ap);
- VPRINTK("ata%u: host_stat 0x%X\n", ap->print_id, host_stat);
+ trace_ata_bmdma_status(ap, host_stat);
/* if it's not our irq... */
if (!(host_stat & ATA_DMA_INTR))
return ata_sff_idle_irq(ap);
/* before we do anything else, clear DMA-Start bit */
+ trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
ap->ops->bmdma_stop(qc);
bmdma_stopped = true;
@@ -2881,6 +2850,7 @@ void ata_bmdma_error_handler(struct ata_port *ap)
u8 host_stat;
host_stat = ap->ops->bmdma_status(ap);
+ trace_ata_bmdma_status(ap, host_stat);
/* BMDMA controllers indicate host bus error by
* setting DMA_ERR bit and timing out. As it wasn't
@@ -2892,6 +2862,7 @@ void ata_bmdma_error_handler(struct ata_port *ap)
thaw = true;
}
+ trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
ap->ops->bmdma_stop(qc);
/* if we're gonna thaw, make sure IRQ is clear */
@@ -2925,6 +2896,7 @@ void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc)
if (ata_is_dma(qc->tf.protocol)) {
spin_lock_irqsave(ap->lock, flags);
+ trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
ap->ops->bmdma_stop(qc);
spin_unlock_irqrestore(ap->lock, flags);
}
diff --git a/drivers/ata/libata-trace.c b/drivers/ata/libata-trace.c
index 08e001303a82..e0e4d0d5a100 100644
--- a/drivers/ata/libata-trace.c
+++ b/drivers/ata/libata-trace.c
@@ -39,6 +39,24 @@ libata_trace_parse_status(struct trace_seq *p, unsigned char status)
}
const char *
+libata_trace_parse_host_stat(struct trace_seq *p, unsigned char host_stat)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "{ ");
+ if (host_stat & ATA_DMA_INTR)
+ trace_seq_printf(p, "INTR ");
+ if (host_stat & ATA_DMA_ERR)
+ trace_seq_printf(p, "ERR ");
+ if (host_stat & ATA_DMA_ACTIVE)
+ trace_seq_printf(p, "ACTIVE ");
+ trace_seq_putc(p, '}');
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *
libata_trace_parse_eh_action(struct trace_seq *p, unsigned int eh_action)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -138,6 +156,35 @@ libata_trace_parse_qc_flags(struct trace_seq *p, unsigned int qc_flags)
}
const char *
+libata_trace_parse_tf_flags(struct trace_seq *p, unsigned int tf_flags)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "%x", tf_flags);
+ if (tf_flags) {
+ trace_seq_printf(p, "{ ");
+ if (tf_flags & ATA_TFLAG_LBA48)
+ trace_seq_printf(p, "LBA48 ");
+ if (tf_flags & ATA_TFLAG_ISADDR)
+ trace_seq_printf(p, "ISADDR ");
+ if (tf_flags & ATA_TFLAG_DEVICE)
+ trace_seq_printf(p, "DEV ");
+ if (tf_flags & ATA_TFLAG_WRITE)
+ trace_seq_printf(p, "WRITE ");
+ if (tf_flags & ATA_TFLAG_LBA)
+ trace_seq_printf(p, "LBA ");
+ if (tf_flags & ATA_TFLAG_FUA)
+ trace_seq_printf(p, "FUA ");
+ if (tf_flags & ATA_TFLAG_POLLING)
+ trace_seq_printf(p, "POLL ");
+ trace_seq_putc(p, '}');
+ }
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *
libata_trace_parse_subcmd(struct trace_seq *p, unsigned char cmd,
unsigned char feature, unsigned char hob_nsect)
{
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 34bb4608bdc6..ca129854a88c 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -163,7 +163,7 @@ static struct {
{ AC_ERR_INVALID, "InvalidArg" },
{ AC_ERR_OTHER, "Unknown" },
{ AC_ERR_NODEV_HINT, "NoDeviceHint" },
- { AC_ERR_NCQ, "NCQError" }
+ { AC_ERR_NCQ, "NCQError" }
};
ata_bitfield_name_match(err, ata_err_names)
@@ -321,13 +321,43 @@ int ata_tport_add(struct device *parent,
return error;
}
+/**
+ * ata_port_classify - determine device type based on ATA-spec signature
+ * @ap: ATA port device on which the classification should be run
+ * @tf: ATA taskfile register set for device to be identified
+ *
+ * A wrapper around ata_dev_classify() to provide additional logging
+ *
+ * RETURNS:
+ * Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP,
+ * %ATA_DEV_ZAC, or %ATA_DEV_UNKNOWN the event of failure.
+ */
+unsigned int ata_port_classify(struct ata_port *ap,
+ const struct ata_taskfile *tf)
+{
+ int i;
+ unsigned int class = ata_dev_classify(tf);
+
+ /* Start with index '1' to skip the 'unknown' entry */
+ for (i = 1; i < ARRAY_SIZE(ata_class_names); i++) {
+ if (ata_class_names[i].value == class) {
+ ata_port_dbg(ap, "found %s device by sig\n",
+ ata_class_names[i].name);
+ return class;
+ }
+ }
+
+ ata_port_info(ap, "found unknown device (class %u)\n", class);
+ return class;
+}
+EXPORT_SYMBOL_GPL(ata_port_classify);
/*
* ATA link attributes
*/
static int noop(int x) { return x; }
-#define ata_link_show_linkspeed(field, format) \
+#define ata_link_show_linkspeed(field, format) \
static ssize_t \
show_ata_link_##field(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -416,7 +446,7 @@ int ata_tlink_add(struct ata_link *link)
dev->release = ata_tlink_release;
if (ata_is_host_link(link))
dev_set_name(dev, "link%d", ap->print_id);
- else
+ else
dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp);
transport_setup_device(dev);
@@ -472,7 +502,7 @@ ata_dev_attr(xfer, dma_mode);
ata_dev_attr(xfer, xfer_mode);
-#define ata_dev_show_simple(field, format_string, cast) \
+#define ata_dev_show_simple(field, format_string, cast) \
static ssize_t \
show_ata_dev_##field(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -482,9 +512,9 @@ show_ata_dev_##field(struct device *dev, \
return scnprintf(buf, 20, format_string, cast ata_dev->field); \
}
-#define ata_dev_simple_attr(field, format_string, type) \
+#define ata_dev_simple_attr(field, format_string, type) \
ata_dev_show_simple(field, format_string, (type)) \
-static DEVICE_ATTR(field, S_IRUGO, \
+ static DEVICE_ATTR(field, S_IRUGO, \
show_ata_dev_##field, NULL)
ata_dev_simple_attr(spdn_cnt, "%d\n", int);
@@ -502,7 +532,7 @@ static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg)
seconds = div_u64_rem(ent->timestamp, HZ, &rem);
arg->written += sprintf(arg->buf + arg->written,
- "[%5llu.%09lu]", seconds,
+ "[%5llu.%09lu]", seconds,
rem * NSEC_PER_SEC / HZ);
arg->written += get_ata_err_names(ent->err_mask,
arg->buf + arg->written);
@@ -667,7 +697,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
dev->release = ata_tdev_release;
if (ata_is_host_link(link))
dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
- else
+ else
dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp);
transport_setup_device(dev);
@@ -689,7 +719,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
*/
#define SETUP_TEMPLATE(attrb, field, perm, test) \
- i->private_##attrb[count] = dev_attr_##field; \
+ i->private_##attrb[count] = dev_attr_##field; \
i->private_##attrb[count].attr.mode = perm; \
i->attrb[count] = &i->private_##attrb[count]; \
if (test) \
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 68cdd81d747c..51e01acdd241 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -148,7 +148,6 @@ extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
unsigned int id, u64 lun);
void ata_scsi_sdev_config(struct scsi_device *sdev);
int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev);
-void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd);
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
/* libata-eh.c */
@@ -166,7 +165,7 @@ extern void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
extern void ata_eh_done(struct ata_link *link, struct ata_device *dev,
unsigned int action);
extern void ata_eh_autopsy(struct ata_port *ap);
-const char *ata_get_cmd_descript(u8 command);
+const char *ata_get_cmd_name(u8 command);
extern void ata_eh_report(struct ata_port *ap);
extern int ata_eh_reset(struct ata_link *link, int classify,
ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
@@ -179,7 +178,7 @@ extern int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
extern void ata_eh_finish(struct ata_port *ap);
extern int ata_ering_map(struct ata_ering *ering,
int (*map_fn)(struct ata_ering_entry *, void *),
- void *arg);
+ void *arg);
extern unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key);
extern unsigned int atapi_eh_request_sense(struct ata_device *dev,
u8 *sense_buf, u8 dfl_sense_key);
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index b7ff63ed3bbb..1b90cda27246 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -37,7 +37,7 @@
#define DRV_NAME "pata_ali"
#define DRV_VERSION "0.7.8"
-static int ali_atapi_dma = 0;
+static int ali_atapi_dma;
module_param_named(atapi_dma, ali_atapi_dma, int, 0644);
MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)");
@@ -123,7 +123,7 @@ static unsigned long ali_20_filter(struct ata_device *adev, unsigned long mask)
mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
ata_id_c_string(adev->id, model_num, ATA_ID_PROD, sizeof(model_num));
if (strstr(model_num, "WDC"))
- return mask &= ~ATA_MASK_UDMA;
+ mask &= ~ATA_MASK_UDMA;
return mask;
}
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 63f39440a9b4..24c3d5e1fca3 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -39,6 +39,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <trace/events/libata.h>
#define DRIVER_NAME "arasan_cf"
#define TIMEOUT msecs_to_jiffies(3000)
@@ -703,9 +704,11 @@ static unsigned int arasan_cf_qc_issue(struct ata_queued_cmd *qc)
case ATA_PROT_DMA:
WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf);
acdev->dma_status = 0;
acdev->qc = qc;
+ trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
arasan_cf_dma_start(acdev);
ap->hsm_task_state = HSM_ST_LAST;
break;
diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
index 2bc5fc81efe3..779d660415c8 100644
--- a/drivers/ata/pata_atp867x.c
+++ b/drivers/ata/pata_atp867x.c
@@ -155,7 +155,7 @@ static int atp867x_get_active_clocks_shifted(struct ata_port *ap,
case 1 ... 6:
break;
default:
- printk(KERN_WARNING "ATP867X: active %dclk is invalid. "
+ ata_port_warn(ap, "ATP867X: active %dclk is invalid. "
"Using 12clk.\n", clk);
fallthrough;
case 9 ... 12:
@@ -171,7 +171,8 @@ active_clock_shift_done:
return clocks << ATP867X_IO_PIOSPD_ACTIVE_SHIFT;
}
-static int atp867x_get_recover_clocks_shifted(unsigned int clk)
+static int atp867x_get_recover_clocks_shifted(struct ata_port *ap,
+ unsigned int clk)
{
unsigned char clocks = clk;
@@ -188,7 +189,7 @@ static int atp867x_get_recover_clocks_shifted(unsigned int clk)
case 15:
break;
default:
- printk(KERN_WARNING "ATP867X: recover %dclk is invalid. "
+ ata_port_warn(ap, "ATP867X: recover %dclk is invalid. "
"Using default 12clk.\n", clk);
fallthrough;
case 12: /* default 12 clk */
@@ -225,7 +226,7 @@ static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev)
iowrite8(b, dp->dma_mode);
b = atp867x_get_active_clocks_shifted(ap, t.active) |
- atp867x_get_recover_clocks_shifted(t.recover);
+ atp867x_get_recover_clocks_shifted(ap, t.recover);
if (adev->devno & 1)
iowrite8(b, dp->slave_piospd);
@@ -233,7 +234,7 @@ static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev)
iowrite8(b, dp->mstr_piospd);
b = atp867x_get_active_clocks_shifted(ap, t.act8b) |
- atp867x_get_recover_clocks_shifted(t.rec8b);
+ atp867x_get_recover_clocks_shifted(ap, t.rec8b);
iowrite8(b, dp->eightb_piospd);
}
@@ -270,7 +271,6 @@ static struct ata_port_operations atp867x_ops = {
};
-#ifdef ATP867X_DEBUG
static void atp867x_check_res(struct pci_dev *pdev)
{
int i;
@@ -280,7 +280,7 @@ static void atp867x_check_res(struct pci_dev *pdev)
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
start = pci_resource_start(pdev, i);
len = pci_resource_len(pdev, i);
- printk(KERN_DEBUG "ATP867X: resource start:len=%lx:%lx\n",
+ dev_dbg(&pdev->dev, "ATP867X: resource start:len=%lx:%lx\n",
start, len);
}
}
@@ -290,49 +290,48 @@ static void atp867x_check_ports(struct ata_port *ap, int port)
struct ata_ioports *ioaddr = &ap->ioaddr;
struct atp867x_priv *dp = ap->private_data;
- printk(KERN_DEBUG "ATP867X: port[%d] addresses\n"
- " cmd_addr =0x%llx, 0x%llx\n"
- " ctl_addr =0x%llx, 0x%llx\n"
- " bmdma_addr =0x%llx, 0x%llx\n"
- " data_addr =0x%llx\n"
- " error_addr =0x%llx\n"
- " feature_addr =0x%llx\n"
- " nsect_addr =0x%llx\n"
- " lbal_addr =0x%llx\n"
- " lbam_addr =0x%llx\n"
- " lbah_addr =0x%llx\n"
- " device_addr =0x%llx\n"
- " status_addr =0x%llx\n"
- " command_addr =0x%llx\n"
- " dp->dma_mode =0x%llx\n"
- " dp->mstr_piospd =0x%llx\n"
- " dp->slave_piospd =0x%llx\n"
- " dp->eightb_piospd =0x%llx\n"
+ ata_port_dbg(ap, "ATP867X: port[%d] addresses\n"
+ " cmd_addr =0x%lx, 0x%lx\n"
+ " ctl_addr =0x%lx, 0x%lx\n"
+ " bmdma_addr =0x%lx, 0x%lx\n"
+ " data_addr =0x%lx\n"
+ " error_addr =0x%lx\n"
+ " feature_addr =0x%lx\n"
+ " nsect_addr =0x%lx\n"
+ " lbal_addr =0x%lx\n"
+ " lbam_addr =0x%lx\n"
+ " lbah_addr =0x%lx\n"
+ " device_addr =0x%lx\n"
+ " status_addr =0x%lx\n"
+ " command_addr =0x%lx\n"
+ " dp->dma_mode =0x%lx\n"
+ " dp->mstr_piospd =0x%lx\n"
+ " dp->slave_piospd =0x%lx\n"
+ " dp->eightb_piospd =0x%lx\n"
" dp->pci66mhz =0x%lx\n",
port,
- (unsigned long long)ioaddr->cmd_addr,
- (unsigned long long)ATP867X_IO_PORTBASE(ap, port),
- (unsigned long long)ioaddr->ctl_addr,
- (unsigned long long)ATP867X_IO_ALTSTATUS(ap, port),
- (unsigned long long)ioaddr->bmdma_addr,
- (unsigned long long)ATP867X_IO_DMABASE(ap, port),
- (unsigned long long)ioaddr->data_addr,
- (unsigned long long)ioaddr->error_addr,
- (unsigned long long)ioaddr->feature_addr,
- (unsigned long long)ioaddr->nsect_addr,
- (unsigned long long)ioaddr->lbal_addr,
- (unsigned long long)ioaddr->lbam_addr,
- (unsigned long long)ioaddr->lbah_addr,
- (unsigned long long)ioaddr->device_addr,
- (unsigned long long)ioaddr->status_addr,
- (unsigned long long)ioaddr->command_addr,
- (unsigned long long)dp->dma_mode,
- (unsigned long long)dp->mstr_piospd,
- (unsigned long long)dp->slave_piospd,
- (unsigned long long)dp->eightb_piospd,
+ (unsigned long)ioaddr->cmd_addr,
+ (unsigned long)ATP867X_IO_PORTBASE(ap, port),
+ (unsigned long)ioaddr->ctl_addr,
+ (unsigned long)ATP867X_IO_ALTSTATUS(ap, port),
+ (unsigned long)ioaddr->bmdma_addr,
+ (unsigned long)ATP867X_IO_DMABASE(ap, port),
+ (unsigned long)ioaddr->data_addr,
+ (unsigned long)ioaddr->error_addr,
+ (unsigned long)ioaddr->feature_addr,
+ (unsigned long)ioaddr->nsect_addr,
+ (unsigned long)ioaddr->lbal_addr,
+ (unsigned long)ioaddr->lbam_addr,
+ (unsigned long)ioaddr->lbah_addr,
+ (unsigned long)ioaddr->device_addr,
+ (unsigned long)ioaddr->status_addr,
+ (unsigned long)ioaddr->command_addr,
+ (unsigned long)dp->dma_mode,
+ (unsigned long)dp->mstr_piospd,
+ (unsigned long)dp->slave_piospd,
+ (unsigned long)dp->eightb_piospd,
(unsigned long)dp->pci66mhz);
}
-#endif
static int atp867x_set_priv(struct ata_port *ap)
{
@@ -370,8 +369,7 @@ static void atp867x_fixup(struct ata_host *host)
if (v < 0x80) {
v = 0x80;
pci_write_config_byte(pdev, PCI_LATENCY_TIMER, v);
- printk(KERN_DEBUG "ATP867X: set latency timer of device %s"
- " to %d\n", pci_name(pdev), v);
+ dev_dbg(&pdev->dev, "ATP867X: set latency timer to %d\n", v);
}
/*
@@ -419,13 +417,11 @@ static int atp867x_ata_pci_sff_init_host(struct ata_host *host)
return rc;
host->iomap = pcim_iomap_table(pdev);
-#ifdef ATP867X_DEBUG
atp867x_check_res(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++)
- printk(KERN_DEBUG "ATP867X: iomap[%d]=0x%llx\n", i,
- (unsigned long long)(host->iomap[i]));
-#endif
+ dev_dbg(gdev, "ATP867X: iomap[%d]=0x%p\n", i,
+ host->iomap[i]);
/*
* request, iomap BARs and init port addresses accordingly
@@ -444,9 +440,8 @@ static int atp867x_ata_pci_sff_init_host(struct ata_host *host)
if (rc)
return rc;
-#ifdef ATP867X_DEBUG
atp867x_check_ports(ap, i);
-#endif
+
ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
(unsigned long)ioaddr->cmd_addr,
(unsigned long)ioaddr->ctl_addr);
@@ -486,7 +481,7 @@ static int atp867x_init_one(struct pci_dev *pdev,
if (rc)
return rc;
- printk(KERN_INFO "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)",
+ dev_info(&pdev->dev, "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)",
pdev->device);
host = ata_host_alloc_pinfo(&pdev->dev, ppi, ATP867X_NUM_PORTS);
diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index d0bcabb58b44..1a3372a72213 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c
@@ -61,7 +61,7 @@ static void cmd640_set_piomode(struct ata_port *ap, struct ata_device *adev)
struct ata_device *pair = ata_dev_pair(adev);
if (ata_timing_compute(adev, adev->pio_mode, &t, T, 0) < 0) {
- printk(KERN_ERR DRV_NAME ": mode computation failed.\n");
+ ata_dev_err(adev, DRV_NAME ": mode computation failed.\n");
return;
}
diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index 1d74d89b5bed..5baa4a7819c1 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c
@@ -116,7 +116,7 @@ static void cmd64x_set_timing(struct ata_port *ap, struct ata_device *adev, u8 m
/* ata_timing_compute is smart and will produce timings for MWDMA
that don't violate the drives PIO capabilities. */
if (ata_timing_compute(adev, mode, &t, T, 0) < 0) {
- printk(KERN_ERR DRV_NAME ": mode computation failed.\n");
+ ata_dev_err(adev, DRV_NAME ": mode computation failed.\n");
return;
}
if (ap->port_no) {
@@ -130,7 +130,7 @@ static void cmd64x_set_timing(struct ata_port *ap, struct ata_device *adev, u8 m
}
}
- printk(KERN_DEBUG DRV_NAME ": active %d recovery %d setup %d.\n",
+ ata_dev_dbg(adev, DRV_NAME ": active %d recovery %d setup %d.\n",
t.active, t.recover, t.setup);
if (t.recover > 16) {
t.active += t.recover - 16;
diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index 247c14702624..24ce8665b1f9 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c
@@ -153,12 +153,12 @@ static int cs5520_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
/* Perform set up for DMA */
if (pci_enable_device_io(pdev)) {
- printk(KERN_ERR DRV_NAME ": unable to configure BAR2.\n");
+ dev_err(&pdev->dev, "unable to configure BAR2.\n");
return -ENODEV;
}
if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
- printk(KERN_ERR DRV_NAME ": unable to configure DMA mask.\n");
+ dev_err(&pdev->dev, "unable to configure DMA mask.\n");
return -ENODEV;
}
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index 760ac6e65216..ab47aeb5587f 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -263,12 +263,12 @@ static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id)
ppi[1] = &ata_dummy_port_info;
if (use_msr)
- printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n");
+ dev_err(&dev->dev, DRV_NAME ": Using MSR regs instead of PCI\n");
cs5536_read(dev, CFG, &cfg);
if ((cfg & IDE_CFG_CHANEN) == 0) {
- printk(KERN_ERR DRV_NAME ": disabled by BIOS\n");
+ dev_err(&dev->dev, DRV_NAME ": disabled by BIOS\n");
return -ENODEV;
}
diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c
index 5b3a7a8ebef6..3be5d52a777b 100644
--- a/drivers/ata/pata_cypress.c
+++ b/drivers/ata/pata_cypress.c
@@ -62,7 +62,7 @@ static void cy82c693_set_piomode(struct ata_port *ap, struct ata_device *adev)
u32 addr;
if (ata_timing_compute(adev, adev->pio_mode, &t, T, 1) < 0) {
- printk(KERN_ERR DRV_NAME ": mome computation failed.\n");
+ ata_dev_err(adev, DRV_NAME ": mome computation failed.\n");
return;
}
diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c
index 46208ececbb6..b78f71c70f27 100644
--- a/drivers/ata/pata_ep93xx.c
+++ b/drivers/ata/pata_ep93xx.c
@@ -855,7 +855,6 @@ static void ep93xx_pata_drain_fifo(struct ata_queued_cmd *qc)
&& count < 65536; count += 2)
ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_DATA);
- /* Can become DEBUG later */
if (count)
ata_port_dbg(ap, "drained %d bytes to clear DRQ.\n", count);
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 06b7c4a9ec95..778c893f276b 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -14,9 +14,6 @@
* TODO
* Look into engine reset on timeout errors. Should not be required.
*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -183,7 +180,7 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
i = match_string(list, -1, model_num);
if (i >= 0) {
- pr_warn("%s is not supported for %s\n", modestr, list[i]);
+ ata_dev_warn(dev, "%s is not supported for %s\n", modestr, list[i]);
return 1;
}
return 0;
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index f242157bc81b..7abc7e04f656 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -14,9 +14,6 @@
* TODO
* Look into engine reset on timeout errors. Should not be required.
*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -231,7 +228,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
i = match_string(list, -1, model_num);
if (i >= 0) {
- pr_warn("%s is not supported for %s\n", modestr, list[i]);
+ ata_dev_warn(dev, "%s is not supported for %s\n",
+ modestr, list[i]);
return 1;
}
return 0;
@@ -864,7 +862,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
chip_table = &hpt372;
break;
default:
- pr_err("Unknown HPT366 subtype, please report (%d)\n",
+ dev_err(&dev->dev,
+ "Unknown HPT366 subtype, please report (%d)\n",
rev);
return -ENODEV;
}
@@ -905,7 +904,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
*ppi = &info_hpt374_fn1;
break;
default:
- pr_err("PCI table is bogus, please report (%d)\n", dev->device);
+ dev_err(&dev->dev, "PCI table is bogus, please report (%d)\n",
+ dev->device);
return -ENODEV;
}
/* Ok so this is a chip we support */
@@ -953,7 +953,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
u8 sr;
u32 total = 0;
- pr_warn("BIOS has not set timing clocks\n");
+ dev_warn(&dev->dev, "BIOS has not set timing clocks\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
@@ -1009,7 +1009,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
(f_high << 16) | f_low | 0x100);
}
if (adjust == 8) {
- pr_err("DPLL did not stabilize!\n");
+ dev_err(&dev->dev, "DPLL did not stabilize!\n");
return -ENODEV;
}
if (dpll == 3)
@@ -1017,7 +1017,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
else
private_data = (void *)hpt37x_timings_50;
- pr_info("bus clock %dMHz, using %dMHz DPLL\n",
+ dev_info(&dev->dev, "bus clock %dMHz, using %dMHz DPLL\n",
MHz[clock_slot], MHz[dpll]);
} else {
private_data = (void *)chip_table->clocks[clock_slot];
@@ -1032,7 +1032,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (clock_slot < 2 && ppi[0] == &info_hpt370a)
ppi[0] = &info_hpt370a_33;
- pr_info("%s using %dMHz bus clock\n",
+ dev_info(&dev->dev, "%s using %dMHz bus clock\n",
chip_table->name, MHz[clock_slot]);
}
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index 48eef338e050..1d9d4eec5b8a 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -15,9 +15,6 @@
* TODO
* Work out best PLL policy
*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -420,7 +417,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev)
u16 sr;
u32 total = 0;
- pr_warn("BIOS clock data not set\n");
+ dev_warn(&pdev->dev, "BIOS clock data not set\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
@@ -530,7 +527,8 @@ hpt372n:
ppi[0] = &info_hpt372n;
break;
default:
- pr_err("PCI table is bogus, please report (%d)\n", dev->device);
+ dev_err(&dev->dev,"PCI table is bogus, please report (%d)\n",
+ dev->device);
return -ENODEV;
}
@@ -579,11 +577,11 @@ hpt372n:
pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low);
}
if (adjust == 8) {
- pr_err("DPLL did not stabilize!\n");
+ dev_err(&dev->dev, "DPLL did not stabilize!\n");
return -ENODEV;
}
- pr_info("bus clock %dMHz, using 66MHz DPLL\n", pci_mhz);
+ dev_info(&dev->dev, "bus clock %dMHz, using 66MHz DPLL\n", pci_mhz);
/*
* Set our private data up. We only need a few flags
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 0e2265978a34..8a5b4e0079ab 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -431,7 +431,8 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc)
case ATA_CMD_SET_FEATURES:
return ata_bmdma_qc_issue(qc);
}
- printk(KERN_DEBUG "it821x: can't process command 0x%02X\n", qc->tf.command);
+ ata_dev_dbg(qc->dev, "it821x: can't process command 0x%02X\n",
+ qc->tf.command);
return AC_ERR_DEV;
}
@@ -507,12 +508,14 @@ static void it821x_dev_config(struct ata_device *adev)
if (strstr(model_num, "Integrated Technology Express")) {
/* RAID mode */
- ata_dev_info(adev, "%sRAID%d volume",
- adev->id[147] ? "Bootable " : "",
- adev->id[129]);
- if (adev->id[129] != 1)
- pr_cont("(%dK stripe)", adev->id[146]);
- pr_cont("\n");
+ if (adev->id[129] == 1)
+ ata_dev_info(adev, "%sRAID%d volume\n",
+ adev->id[147] ? "Bootable " : "",
+ adev->id[129]);
+ else
+ ata_dev_info(adev, "%sRAID%d volume (%dK stripe)\n",
+ adev->id[147] ? "Bootable " : "",
+ adev->id[129], adev->id[146]);
}
/* This is a controller firmware triggered funny, don't
report the drive faulty! */
@@ -534,7 +537,7 @@ static void it821x_dev_config(struct ata_device *adev)
*/
static unsigned int it821x_read_id(struct ata_device *adev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
unsigned int err_mask;
unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -542,21 +545,20 @@ static unsigned int it821x_read_id(struct ata_device *adev,
err_mask = ata_do_dev_read_id(adev, tf, id);
if (err_mask)
return err_mask;
- ata_id_c_string(id, model_num, ATA_ID_PROD, sizeof(model_num));
+ ata_id_c_string((u16 *)id, model_num, ATA_ID_PROD, sizeof(model_num));
- id[83] &= ~(1 << 12); /* Cache flush is firmware handled */
- id[83] &= ~(1 << 13); /* Ditto for LBA48 flushes */
- id[84] &= ~(1 << 6); /* No FUA */
- id[85] &= ~(1 << 10); /* No HPA */
- id[76] = 0; /* No NCQ/AN etc */
+ id[83] &= cpu_to_le16(~(1 << 12)); /* Cache flush is firmware handled */
+ id[84] &= cpu_to_le16(~(1 << 6)); /* No FUA */
+ id[85] &= cpu_to_le16(~(1 << 10)); /* No HPA */
+ id[76] = 0; /* No NCQ/AN etc */
if (strstr(model_num, "Integrated Technology Express")) {
/* Set feature bits the firmware neglects */
- id[49] |= 0x0300; /* LBA, DMA */
- id[83] &= 0x7FFF;
- id[83] |= 0x4400; /* Word 83 is valid and LBA48 */
- id[86] |= 0x0400; /* LBA48 on */
- id[ATA_ID_MAJOR_VER] |= 0x1F;
+ id[49] |= cpu_to_le16(0x0300); /* LBA, DMA */
+ id[83] &= cpu_to_le16(0x7FFF);
+ id[83] |= cpu_to_le16(0x4400); /* Word 83 is valid and LBA48 */
+ id[86] |= cpu_to_le16(0x0400); /* LBA48 on */
+ id[ATA_ID_MAJOR_VER] |= cpu_to_le16(0x1F);
/* Clear the serial number because it's different each boot
which breaks validation on resume */
memset(&id[ATA_ID_SERNO], 0x20, ATA_ID_SERNO_LEN);
@@ -593,6 +595,7 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc)
/**
* it821x_display_disk - display disk setup
+ * @ap: ATA port
* @n: Device number
* @buf: Buffer block from firmware
*
@@ -600,7 +603,7 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc)
* by the firmware.
*/
-static void it821x_display_disk(int n, u8 *buf)
+static void it821x_display_disk(struct ata_port *ap, int n, u8 *buf)
{
unsigned char id[41];
int mode = 0;
@@ -633,13 +636,13 @@ static void it821x_display_disk(int n, u8 *buf)
else
strcpy(mbuf, "PIO");
if (buf[52] == 4)
- printk(KERN_INFO "%d: %-6s %-8s %s %s\n",
+ ata_port_info(ap, "%d: %-6s %-8s %s %s\n",
n, mbuf, types[buf[52]], id, cbl);
else
- printk(KERN_INFO "%d: %-6s %-8s Volume: %1d %s %s\n",
+ ata_port_info(ap, "%d: %-6s %-8s Volume: %1d %s %s\n",
n, mbuf, types[buf[52]], buf[53], id, cbl);
if (buf[125] < 100)
- printk(KERN_INFO "%d: Rebuilding: %d%%\n", n, buf[125]);
+ ata_port_info(ap, "%d: Rebuilding: %d%%\n", n, buf[125]);
}
/**
@@ -676,7 +679,7 @@ static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len)
status = ioread8(ap->ioaddr.status_addr);
if (status & ATA_ERR) {
kfree(buf);
- printk(KERN_ERR "it821x_firmware_command: rejected\n");
+ ata_port_err(ap, "%s: rejected\n", __func__);
return NULL;
}
if (status & ATA_DRQ) {
@@ -686,7 +689,7 @@ static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len)
usleep_range(500, 1000);
}
kfree(buf);
- printk(KERN_ERR "it821x_firmware_command: timeout\n");
+ ata_port_err(ap, "%s: timeout\n", __func__);
return NULL;
}
@@ -709,13 +712,13 @@ static void it821x_probe_firmware(struct ata_port *ap)
buf = it821x_firmware_command(ap, 0xFA, 512);
if (buf != NULL) {
- printk(KERN_INFO "pata_it821x: Firmware %02X/%02X/%02X%02X\n",
+ ata_port_info(ap, "pata_it821x: Firmware %02X/%02X/%02X%02X\n",
buf[505],
buf[506],
buf[507],
buf[508]);
for (i = 0; i < 4; i++)
- it821x_display_disk(i, buf + 128 * i);
+ it821x_display_disk(ap, i, buf + 128 * i);
kfree(buf);
}
}
@@ -771,7 +774,8 @@ static int it821x_port_start(struct ata_port *ap)
itdev->timing10 = 1;
/* Need to disable ATAPI DMA for this case */
if (!itdev->smart)
- printk(KERN_WARNING DRV_NAME": Revision 0x10, workarounds activated.\n");
+ dev_warn(&pdev->dev,
+ "Revision 0x10, workarounds activated.\n");
}
return 0;
@@ -919,14 +923,14 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
} else {
/* Force the card into bypass mode if so requested */
if (it8212_noraid) {
- printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n");
+ dev_info(&pdev->dev, "forcing bypass mode.\n");
it821x_disable_raid(pdev);
}
pci_read_config_byte(pdev, 0x50, &conf);
conf &= 1;
- printk(KERN_INFO DRV_NAME": controller in %s mode.\n",
- mode[conf]);
+ dev_info(&pdev->dev, "controller in %s mode.\n", mode[conf]);
+
if (conf == 0)
ppi[0] = &info_passthru;
else
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index 99c63087c8ae..17b557c91e1c 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -114,7 +114,7 @@ static void ixp4xx_set_piomode(struct ata_port *ap, struct ata_device *adev)
{
struct ixp4xx_pata *ixpp = ap->host->private_data;
- ata_dev_printk(adev, KERN_INFO, "configured for PIO%d 8bit\n",
+ ata_dev_info(adev, "configured for PIO%d 8bit\n",
adev->pio_mode - XFER_PIO_0);
ixp4xx_set_8bit_timing(ixpp, adev->pio_mode);
}
@@ -132,8 +132,8 @@ static unsigned int ixp4xx_mmio_data_xfer(struct ata_queued_cmd *qc,
struct ixp4xx_pata *ixpp = ap->host->private_data;
unsigned long flags;
- ata_dev_printk(adev, KERN_DEBUG, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE",
- buflen);
+ ata_dev_dbg(adev, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE",
+ buflen);
spin_lock_irqsave(ap->lock, flags);
/* set the expansion bus in 16bit mode and restore
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index 361597d14c56..0c5a51970fbf 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -32,7 +32,6 @@
static int marvell_pata_active(struct pci_dev *pdev)
{
- int i;
u32 devices;
void __iomem *barp;
@@ -44,11 +43,6 @@ static int marvell_pata_active(struct pci_dev *pdev)
if (barp == NULL)
return -ENOMEM;
- printk("BAR5:");
- for(i = 0; i <= 0x0F; i++)
- printk("%02X:%02X ", i, ioread8(barp + i));
- printk("\n");
-
devices = ioread32(barp + 0x0C);
pci_iounmap(pdev, barp);
@@ -149,7 +143,8 @@ static int marvell_init_one (struct pci_dev *pdev, const struct pci_device_id *i
#if IS_ENABLED(CONFIG_SATA_AHCI)
if (!marvell_pata_active(pdev)) {
- printk(KERN_INFO DRV_NAME ": PATA port not active, deferring to AHCI driver.\n");
+ dev_info(&pdev->dev,
+ "PATA port not active, deferring to AHCI driver.\n");
return -ENODEV;
}
#endif
diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index a7ecc1a204b5..06929e77c491 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -21,12 +21,13 @@
/* No PIO or DMA methods needed for this device */
static unsigned int netcell_read_id(struct ata_device *adev,
- struct ata_taskfile *tf, u16 *id)
+ struct ata_taskfile *tf, __le16 *id)
{
unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
+
/* Firmware forgets to mark words 85-87 valid */
if (err_mask == 0)
- id[ATA_ID_CSF_DEFAULT] |= 0x4000;
+ id[ATA_ID_CSF_DEFAULT] |= cpu_to_le16(0x4000);
return err_mask;
}
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index b5a3f710d76d..05c2ab375756 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -19,7 +19,7 @@
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <scsi/scsi_host.h>
-
+#include <trace/events/libata.h>
#include <asm/byteorder.h>
#include <asm/octeon/octeon.h>
@@ -73,16 +73,12 @@ MODULE_PARM_DESC(enable_dma,
*/
static unsigned int ns_to_tim_reg(unsigned int tim_mult, unsigned int nsecs)
{
- unsigned int val;
-
/*
* Compute # of eclock periods to get desired duration in
* nanoseconds.
*/
- val = DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000),
+ return DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000),
1000 * tim_mult);
-
- return val;
}
static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier)
@@ -273,9 +269,9 @@ static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev)
dma_tim.s.we_n = ns_to_tim_reg(tim_mult, oe_n);
dma_tim.s.we_a = ns_to_tim_reg(tim_mult, oe_a);
- pr_debug("ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60,
+ ata_dev_dbg(dev, "ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60,
ns_to_tim_reg(tim_mult, 60));
- pr_debug("oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n",
+ ata_dev_dbg(dev, "oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n",
dma_tim.s.oe_n, dma_tim.s.oe_a, dma_tim.s.dmack_s,
dma_tim.s.dmack_h, dma_tim.s.dmarq, dma_tim.s.pause);
@@ -440,7 +436,6 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes,
int rc;
u8 err;
- DPRINTK("about to softreset\n");
__raw_writew(ap->ctl, base + 0xe);
udelay(20);
__raw_writew(ap->ctl | ATA_SRST, base + 0xe);
@@ -455,7 +450,6 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes,
/* determine by signature whether we have ATA or ATAPI devices */
classes[0] = ata_sff_dev_classify(&link->device[0], 1, &err);
- DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
return 0;
}
@@ -479,23 +473,11 @@ static void octeon_cf_tf_load16(struct ata_port *ap,
__raw_writew(tf->hob_feature << 8, base + 0xc);
__raw_writew(tf->hob_nsect | tf->hob_lbal << 8, base + 2);
__raw_writew(tf->hob_lbam | tf->hob_lbah << 8, base + 4);
- VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
- tf->hob_feature,
- tf->hob_nsect,
- tf->hob_lbal,
- tf->hob_lbam,
- tf->hob_lbah);
}
if (is_addr) {
__raw_writew(tf->feature << 8, base + 0xc);
__raw_writew(tf->nsect | tf->lbal << 8, base + 2);
__raw_writew(tf->lbam | tf->lbah << 8, base + 4);
- VPRINTK("feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
- tf->feature,
- tf->nsect,
- tf->lbal,
- tf->lbam,
- tf->lbah);
}
ata_wait_idle(ap);
}
@@ -516,20 +498,14 @@ static void octeon_cf_exec_command16(struct ata_port *ap,
{
/* The base of the registers is at ioaddr.data_addr. */
void __iomem *base = ap->ioaddr.data_addr;
- u16 blob;
+ u16 blob = 0;
- if (tf->flags & ATA_TFLAG_DEVICE) {
- VPRINTK("device 0x%X\n", tf->device);
+ if (tf->flags & ATA_TFLAG_DEVICE)
blob = tf->device;
- } else {
- blob = 0;
- }
- DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
blob |= (tf->command << 8);
__raw_writew(blob, base + 6);
-
ata_wait_idle(ap);
}
@@ -543,12 +519,10 @@ static void octeon_cf_dma_setup(struct ata_queued_cmd *qc)
struct octeon_cf_port *cf_port;
cf_port = ap->private_data;
- DPRINTK("ENTER\n");
/* issue r/w command */
qc->cursg = qc->sg;
cf_port->dma_finished = 0;
ap->ops->sff_exec_command(ap, &qc->tf);
- DPRINTK("EXIT\n");
}
/**
@@ -563,8 +537,6 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc)
union cvmx_mio_boot_dma_intx mio_boot_dma_int;
struct scatterlist *sg;
- VPRINTK("%d scatterlists\n", qc->n_elem);
-
/* Get the scatter list entry we need to DMA into */
sg = qc->cursg;
BUG_ON(!sg);
@@ -605,10 +577,6 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc)
mio_boot_dma_cfg.s.adr = sg_dma_address(sg);
- VPRINTK("%s %d bytes address=%p\n",
- (mio_boot_dma_cfg.s.rw) ? "write" : "read", sg->length,
- (void *)(unsigned long)mio_boot_dma_cfg.s.adr);
-
cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64);
}
@@ -627,9 +595,7 @@ static unsigned int octeon_cf_dma_finished(struct ata_port *ap,
union cvmx_mio_boot_dma_intx dma_int;
u8 status;
- VPRINTK("ata%u: protocol %d task_state %d\n",
- ap->print_id, qc->tf.protocol, ap->hsm_task_state);
-
+ trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
if (ap->hsm_task_state != HSM_ST_LAST)
return 0;
@@ -678,7 +644,6 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
spin_lock_irqsave(&host->lock, flags);
- DPRINTK("ENTER\n");
for (i = 0; i < host->n_ports; i++) {
u8 status;
struct ata_port *ap;
@@ -701,6 +666,7 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
if (!sg_is_last(qc->cursg)) {
qc->cursg = sg_next(qc->cursg);
handled = 1;
+ trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
octeon_cf_dma_start(qc);
continue;
} else {
@@ -732,7 +698,6 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
}
}
spin_unlock_irqrestore(&host->lock, flags);
- DPRINTK("EXIT\n");
return IRQ_RETVAL(handled);
}
@@ -800,8 +765,11 @@ static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc)
case ATA_PROT_DMA:
WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING);
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
+ trace_ata_bmdma_setup(ap, &qc->tf, qc->tag);
octeon_cf_dma_setup(qc); /* set up dma */
+ trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
octeon_cf_dma_start(qc); /* initiate dma */
ap->hsm_task_state = HSM_ST_LAST;
break;
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 35aa158fc976..c3a40b717dcd 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -25,11 +25,12 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
struct device_node *dn = ofdev->dev.of_node;
struct resource io_res;
struct resource ctl_res;
- struct resource *irq_res;
+ struct resource irq_res;
unsigned int reg_shift = 0;
int pio_mode = 0;
int pio_mask;
bool use16bit;
+ int irq;
ret = of_address_to_resource(dn, 0, &io_res);
if (ret) {
@@ -45,7 +46,15 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
return -EINVAL;
}
- irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0);
+ memset(&irq_res, 0, sizeof(irq_res));
+
+ irq = platform_get_irq_optional(ofdev, 0);
+ if (irq < 0 && irq != -ENXIO)
+ return irq;
+ if (irq > 0) {
+ irq_res.start = irq;
+ irq_res.end = irq;
+ }
of_property_read_u32(dn, "reg-shift", &reg_shift);
@@ -63,7 +72,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
pio_mask = 1 << pio_mode;
pio_mask |= (1 << pio_mode) - 1;
- return __pata_platform_probe(&ofdev->dev, &io_res, &ctl_res, irq_res,
+ return __pata_platform_probe(&ofdev->dev, &io_res, &ctl_res, irq > 0 ? &irq_res : NULL,
reg_shift, pio_mask, &pata_platform_sht,
use16bit);
}
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index effc1a09444d..4fbb3eed8b0b 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -30,13 +30,6 @@
#define DRV_NAME "pata_pdc2027x"
#define DRV_VERSION "1.0"
-#undef PDC_DEBUG
-
-#ifdef PDC_DEBUG
-#define PDPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define PDPRINTK(fmt, args...)
-#endif
enum {
PDC_MMIO_BAR = 5,
@@ -214,11 +207,11 @@ static int pdc2027x_cable_detect(struct ata_port *ap)
if (cgcr & (1 << 26))
goto cbl40;
- PDPRINTK("No cable or 80-conductor cable on port %d\n", ap->port_no);
+ ata_port_dbg(ap, "No cable or 80-conductor cable\n");
return ATA_CBL_PATA80;
cbl40:
- printk(KERN_INFO DRV_NAME ": 40-conductor cable detected on port %d\n", ap->port_no);
+ ata_port_info(ap, DRV_NAME ":40-conductor cable detected\n");
return ATA_CBL_PATA40;
}
@@ -292,17 +285,17 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev)
unsigned int pio = adev->pio_mode - XFER_PIO_0;
u32 ctcr0, ctcr1;
- PDPRINTK("adev->pio_mode[%X]\n", adev->pio_mode);
+ ata_port_dbg(ap, "adev->pio_mode[%X]\n", adev->pio_mode);
/* Sanity check */
if (pio > 4) {
- printk(KERN_ERR DRV_NAME ": Unknown pio mode [%d] ignored\n", pio);
+ ata_port_err(ap, "Unknown pio mode [%d] ignored\n", pio);
return;
}
/* Set the PIO timing registers using value table for 133MHz */
- PDPRINTK("Set pio regs... \n");
+ ata_port_dbg(ap, "Set pio regs... \n");
ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0));
ctcr0 &= 0xffff0000;
@@ -315,9 +308,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev)
ctcr1 |= (pdc2027x_pio_timing_tbl[pio].value2 << 24);
iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1));
- PDPRINTK("Set pio regs done\n");
-
- PDPRINTK("Set to pio mode[%u] \n", pio);
+ ata_port_dbg(ap, "Set to pio mode[%u] \n", pio);
}
/**
@@ -350,7 +341,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev)
iowrite32(ctcr1 & ~(1 << 7), dev_mmio(ap, adev, PDC_CTCR1));
}
- PDPRINTK("Set udma regs... \n");
+ ata_port_dbg(ap, "Set udma regs... \n");
ctcr1 = ioread32(dev_mmio(ap, adev, PDC_CTCR1));
ctcr1 &= 0xff000000;
@@ -359,16 +350,14 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev)
(pdc2027x_udma_timing_tbl[udma_mode].value2 << 16);
iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1));
- PDPRINTK("Set udma regs done\n");
-
- PDPRINTK("Set to udma mode[%u] \n", udma_mode);
+ ata_port_dbg(ap, "Set to udma mode[%u] \n", udma_mode);
} else if ((dma_mode >= XFER_MW_DMA_0) &&
(dma_mode <= XFER_MW_DMA_2)) {
/* Set the MDMA timing registers with value table for 133MHz */
unsigned int mdma_mode = dma_mode & 0x07;
- PDPRINTK("Set mdma regs... \n");
+ ata_port_dbg(ap, "Set mdma regs... \n");
ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0));
ctcr0 &= 0x0000ffff;
@@ -376,11 +365,10 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev)
(pdc2027x_mdma_timing_tbl[mdma_mode].value1 << 24);
iowrite32(ctcr0, dev_mmio(ap, adev, PDC_CTCR0));
- PDPRINTK("Set mdma regs done\n");
- PDPRINTK("Set to mdma mode[%u] \n", mdma_mode);
+ ata_port_dbg(ap, "Set to mdma mode[%u] \n", mdma_mode);
} else {
- printk(KERN_ERR DRV_NAME ": Unknown dma mode [%u] ignored\n", dma_mode);
+ ata_port_err(ap, "Unknown dma mode [%u] ignored\n", dma_mode);
}
}
@@ -414,7 +402,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
ctcr1 |= (1 << 25);
iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1));
- PDPRINTK("Turn on prefetch\n");
+ ata_dev_dbg(dev, "Turn on prefetch\n");
} else {
pdc2027x_set_dmamode(ap, dev);
}
@@ -485,8 +473,8 @@ retry:
counter = (bccrh << 15) | bccrl;
- PDPRINTK("bccrh [%X] bccrl [%X]\n", bccrh, bccrl);
- PDPRINTK("bccrhv[%X] bccrlv[%X]\n", bccrhv, bccrlv);
+ dev_dbg(host->dev, "bccrh [%X] bccrl [%X]\n", bccrh, bccrl);
+ dev_dbg(host->dev, "bccrhv[%X] bccrlv[%X]\n", bccrhv, bccrlv);
/*
* The 30-bit decreasing counter are read by 2 pieces.
@@ -495,7 +483,7 @@ retry:
*/
if (retry && !(bccrh == bccrhv && bccrl >= bccrlv)) {
retry--;
- PDPRINTK("rereading counter\n");
+ dev_dbg(host->dev, "rereading counter\n");
goto retry;
}
@@ -520,20 +508,19 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b
/* Sanity check */
if (unlikely(pll_clock_khz < 5000L || pll_clock_khz > 70000L)) {
- printk(KERN_ERR DRV_NAME ": Invalid PLL input clock %ldkHz, give up!\n", pll_clock_khz);
+ dev_err(host->dev, "Invalid PLL input clock %ldkHz, give up!\n",
+ pll_clock_khz);
return;
}
-#ifdef PDC_DEBUG
- PDPRINTK("pout_required is %ld\n", pout_required);
+ dev_dbg(host->dev, "pout_required is %ld\n", pout_required);
/* Show the current clock value of PLL control register
* (maybe already configured by the firmware)
*/
pll_ctl = ioread16(mmio_base + PDC_PLL_CTL);
- PDPRINTK("pll_ctl[%X]\n", pll_ctl);
-#endif
+ dev_dbg(host->dev, "pll_ctl[%X]\n", pll_ctl);
/*
* Calculate the ratio of F, R and OD
@@ -552,7 +539,7 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b
R = 0x00;
} else {
/* Invalid ratio */
- printk(KERN_ERR DRV_NAME ": Invalid ratio %ld, give up!\n", ratio);
+ dev_err(host->dev, "Invalid ratio %ld, give up!\n", ratio);
return;
}
@@ -560,15 +547,15 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b
if (unlikely(F < 0 || F > 127)) {
/* Invalid F */
- printk(KERN_ERR DRV_NAME ": F[%d] invalid!\n", F);
+ dev_err(host->dev, "F[%d] invalid!\n", F);
return;
}
- PDPRINTK("F[%d] R[%d] ratio*1000[%ld]\n", F, R, ratio);
+ dev_dbg(host->dev, "F[%d] R[%d] ratio*1000[%ld]\n", F, R, ratio);
pll_ctl = (R << 8) | F;
- PDPRINTK("Writing pll_ctl[%X]\n", pll_ctl);
+ dev_dbg(host->dev, "Writing pll_ctl[%X]\n", pll_ctl);
iowrite16(pll_ctl, mmio_base + PDC_PLL_CTL);
ioread16(mmio_base + PDC_PLL_CTL); /* flush */
@@ -576,15 +563,13 @@ static void pdc_adjust_pll(struct ata_host *host, long pll_clock, unsigned int b
/* Wait the PLL circuit to be stable */
msleep(30);
-#ifdef PDC_DEBUG
/*
* Show the current clock value of PLL control register
* (maybe configured by the firmware)
*/
pll_ctl = ioread16(mmio_base + PDC_PLL_CTL);
- PDPRINTK("pll_ctl[%X]\n", pll_ctl);
-#endif
+ dev_dbg(host->dev, "pll_ctl[%X]\n", pll_ctl);
return;
}
@@ -605,7 +590,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
/* Start the test mode */
scr = ioread32(mmio_base + PDC_SYS_CTL);
- PDPRINTK("scr[%X]\n", scr);
+ dev_dbg(host->dev, "scr[%X]\n", scr);
iowrite32(scr | (0x01 << 14), mmio_base + PDC_SYS_CTL);
ioread32(mmio_base + PDC_SYS_CTL); /* flush */
@@ -622,7 +607,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
/* Stop the test mode */
scr = ioread32(mmio_base + PDC_SYS_CTL);
- PDPRINTK("scr[%X]\n", scr);
+ dev_dbg(host->dev, "scr[%X]\n", scr);
iowrite32(scr & ~(0x01 << 14), mmio_base + PDC_SYS_CTL);
ioread32(mmio_base + PDC_SYS_CTL); /* flush */
@@ -632,8 +617,8 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
pll_clock = ((start_count - end_count) & 0x3fffffff) / 100 *
(100000000 / usec_elapsed);
- PDPRINTK("start[%ld] end[%ld] \n", start_count, end_count);
- PDPRINTK("PLL input clock[%ld]Hz\n", pll_clock);
+ dev_dbg(host->dev, "start[%ld] end[%ld] PLL input clock[%ld]HZ\n",
+ start_count, end_count, pll_clock);
return pll_clock;
}
diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c
index 0c5cbcd28d0d..b99849095853 100644
--- a/drivers/ata/pata_pdc202xx_old.c
+++ b/drivers/ata/pata_pdc202xx_old.c
@@ -38,8 +38,6 @@ static int pdc2026x_cable_detect(struct ata_port *ap)
static void pdc202xx_exec_command(struct ata_port *ap,
const struct ata_taskfile *tf)
{
- DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
-
iowrite8(tf->command, ap->ioaddr.command_addr);
ndelay(400);
}
diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index 3722a67083fd..fb00c3e5fd19 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c
@@ -69,7 +69,7 @@ static int rz1000_fifo_disable(struct pci_dev *pdev)
reg &= 0xDFFF;
if (pci_write_config_word(pdev, 0x40, reg) != 0)
return -1;
- printk(KERN_INFO DRV_NAME ": disabled chipset readahead.\n");
+ dev_info(&pdev->dev, "disabled chipset readahead.\n");
return 0;
}
@@ -97,7 +97,7 @@ static int rz1000_init_one (struct pci_dev *pdev, const struct pci_device_id *en
if (rz1000_fifo_disable(pdev) == 0)
return ata_pci_sff_init_one(pdev, ppi, &rz1000_sht, NULL, 0);
- printk(KERN_ERR DRV_NAME ": failed to disable read-ahead on chipset..\n");
+ dev_err(&pdev->dev, "failed to disable read-ahead on chipset.\n");
/* Not safe to use so skip */
return -ENODEV;
}
diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index b602e303fb54..e410fe44177f 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c
@@ -286,13 +286,13 @@ static int serverworks_fixup_osb4(struct pci_dev *pdev)
pci_read_config_dword(isa_dev, 0x64, &reg);
reg &= ~0x00002000; /* disable 600ns interrupt mask */
if (!(reg & 0x00004000))
- printk(KERN_DEBUG DRV_NAME ": UDMA not BIOS enabled.\n");
+ dev_info(&pdev->dev, "UDMA not BIOS enabled.\n");
reg |= 0x00004000; /* enable UDMA/33 support */
pci_write_config_dword(isa_dev, 0x64, reg);
pci_dev_put(isa_dev);
return 0;
}
- printk(KERN_WARNING DRV_NAME ": Unable to find bridge.\n");
+ dev_warn(&pdev->dev, "Unable to find bridge.\n");
return -ENODEV;
}
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 43215a664b96..0da58ce20d82 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -212,7 +212,6 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev)
static void sil680_sff_exec_command(struct ata_port *ap,
const struct ata_taskfile *tf)
{
- DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
iowrite8(tf->command, ap->ioaddr.command_addr);
ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
}
@@ -309,17 +308,17 @@ static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio)
switch (tmpbyte & 0x30) {
case 0x00:
- printk(KERN_INFO "sil680: 100MHz clock.\n");
+ dev_info(&pdev->dev, "sil680: 100MHz clock.\n");
break;
case 0x10:
- printk(KERN_INFO "sil680: 133MHz clock.\n");
+ dev_info(&pdev->dev, "sil680: 133MHz clock.\n");
break;
case 0x20:
- printk(KERN_INFO "sil680: Using PCI clock.\n");
+ dev_info(&pdev->dev, "sil680: Using PCI clock.\n");
break;
/* This last case is _NOT_ ok */
case 0x30:
- printk(KERN_ERR "sil680: Clock disabled ?\n");
+ dev_err(&pdev->dev, "sil680: Clock disabled ?\n");
}
return tmpbyte & 0x30;
}
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 475032048984..439ca882f73c 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -414,12 +414,6 @@ static void via_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
iowrite8(tf->hob_lbal, ioaddr->lbal_addr);
iowrite8(tf->hob_lbam, ioaddr->lbam_addr);
iowrite8(tf->hob_lbah, ioaddr->lbah_addr);
- VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
- tf->hob_feature,
- tf->hob_nsect,
- tf->hob_lbal,
- tf->hob_lbam,
- tf->hob_lbah);
}
if (is_addr) {
@@ -428,12 +422,6 @@ static void via_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
iowrite8(tf->lbal, ioaddr->lbal_addr);
iowrite8(tf->lbam, ioaddr->lbam_addr);
iowrite8(tf->lbah, ioaddr->lbah_addr);
- VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n",
- tf->feature,
- tf->nsect,
- tf->lbal,
- tf->lbam,
- tf->lbah);
}
ata_wait_idle(ap);
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index 5db55e1e2a61..35b823ac20c9 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -284,9 +284,6 @@ static int adma_fill_sg(struct ata_queued_cmd *qc)
*(__le32 *)(buf + i) =
(pFLAGS & pEND) ? 0 : cpu_to_le32(pp->pkt_dma + i + 4);
i += 4;
-
- VPRINTK("PRD[%u] = (0x%lX, 0x%X)\n", i/4,
- (unsigned long)addr, len);
}
if (likely(last_buf))
@@ -302,8 +299,6 @@ static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc)
u32 pkt_dma = (u32)pp->pkt_dma;
int i = 0;
- VPRINTK("ENTER\n");
-
adma_enter_reg_mode(qc->ap);
if (qc->tf.protocol != ATA_PROT_DMA)
return AC_ERR_OK;
@@ -355,22 +350,6 @@ static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc)
i = adma_fill_sg(qc);
wmb(); /* flush PRDs and pkt to memory */
-#if 0
- /* dump out CPB + PRDs for debug */
- {
- int j, len = 0;
- static char obuf[2048];
- for (j = 0; j < i; ++j) {
- len += sprintf(obuf+len, "%02x ", buf[j]);
- if ((j & 7) == 7) {
- printk("%s\n", obuf);
- len = 0;
- }
- }
- if (len)
- printk("%s\n", obuf);
- }
-#endif
return AC_ERR_OK;
}
@@ -379,8 +358,6 @@ static inline void adma_packet_start(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
void __iomem *chan = ADMA_PORT_REGS(ap);
- VPRINTK("ENTER, ap %p\n", ap);
-
/* fire up the ADMA engine */
writew(aPIOMD4 | aGO, chan + ADMA_CONTROL);
}
@@ -475,8 +452,6 @@ static inline unsigned int adma_intr_mmio(struct ata_host *host)
u8 status = ata_sff_check_status(ap);
if ((status & ATA_BUSY))
continue;
- DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n",
- ap->print_id, qc->tf.protocol, status);
/* complete taskfile transaction */
pp->state = adma_state_idle;
@@ -504,14 +479,10 @@ static irqreturn_t adma_intr(int irq, void *dev_instance)
struct ata_host *host = dev_instance;
unsigned int handled = 0;
- VPRINTK("ENTER\n");
-
spin_lock(&host->lock);
handled = adma_intr_pkt(host) | adma_intr_mmio(host);
spin_unlock(&host->lock);
- VPRINTK("EXIT\n");
-
return IRQ_RETVAL(handled);
}
@@ -547,8 +518,8 @@ static int adma_port_start(struct ata_port *ap)
return -ENOMEM;
/* paranoia? */
if ((pp->pkt_dma & 7) != 0) {
- printk(KERN_ERR "bad alignment for pp->pkt_dma: %08x\n",
- (u32)pp->pkt_dma);
+ ata_port_err(ap, "bad alignment for pp->pkt_dma: %08x\n",
+ (u32)pp->pkt_dma);
return -ENOMEM;
}
ap->private_data = pp;
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 338c2e50f759..bec33d781ae0 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -14,15 +14,6 @@
* COPYRIGHT (C) 2005 SYNOPSYS, INC. ALL RIGHTS RESERVED
*/
-#ifdef CONFIG_SATA_DWC_DEBUG
-#define DEBUG
-#endif
-
-#ifdef CONFIG_SATA_DWC_VDEBUG
-#define VERBOSE_DEBUG
-#define DEBUG_NCQ
-#endif
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>
@@ -34,6 +25,7 @@
#include <linux/phy/phy.h>
#include <linux/libata.h>
#include <linux/slab.h>
+#include <trace/events/libata.h>
#include "libata.h"
@@ -182,10 +174,8 @@ enum {
* Prototypes
*/
static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag);
-static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc,
- u32 check_status);
-static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status);
-static void sata_dwc_port_stop(struct ata_port *ap);
+static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc);
+static void sata_dwc_dma_xfer_complete(struct ata_port *ap);
static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag);
#ifdef CONFIG_SATA_DWC_OLD_DMA
@@ -215,9 +205,10 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp)
{
struct sata_dwc_device *hsdev = hsdevp->hsdev;
struct dw_dma_slave *dws = &sata_dwc_dma_dws;
+ struct device *dev = hsdev->dev;
dma_cap_mask_t mask;
- dws->dma_dev = hsdev->dev;
+ dws->dma_dev = dev;
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
@@ -225,8 +216,7 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp)
/* Acquire DMA channel */
hsdevp->chan = dma_request_channel(mask, sata_dwc_dma_filter, hsdevp);
if (!hsdevp->chan) {
- dev_err(hsdev->dev, "%s: dma channel unavailable\n",
- __func__);
+ dev_err(dev, "%s: dma channel unavailable\n", __func__);
return -EAGAIN;
}
@@ -236,26 +226,25 @@ static int sata_dwc_dma_get_channel_old(struct sata_dwc_device_port *hsdevp)
static int sata_dwc_dma_init_old(struct platform_device *pdev,
struct sata_dwc_device *hsdev)
{
- struct device_node *np = pdev->dev.of_node;
- struct resource *res;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
- hsdev->dma = devm_kzalloc(&pdev->dev, sizeof(*hsdev->dma), GFP_KERNEL);
+ hsdev->dma = devm_kzalloc(dev, sizeof(*hsdev->dma), GFP_KERNEL);
if (!hsdev->dma)
return -ENOMEM;
- hsdev->dma->dev = &pdev->dev;
+ hsdev->dma->dev = dev;
hsdev->dma->id = pdev->id;
/* Get SATA DMA interrupt number */
hsdev->dma->irq = irq_of_parse_and_map(np, 1);
if (hsdev->dma->irq == NO_IRQ) {
- dev_err(&pdev->dev, "no SATA DMA irq\n");
+ dev_err(dev, "no SATA DMA irq\n");
return -ENODEV;
}
/* Get physical SATA DMA register base address */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- hsdev->dma->regs = devm_ioremap_resource(&pdev->dev, res);
+ hsdev->dma->regs = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(hsdev->dma->regs))
return PTR_ERR(hsdev->dma->regs);
@@ -297,35 +286,6 @@ static const char *get_prot_descript(u8 protocol)
}
}
-static const char *get_dma_dir_descript(int dma_dir)
-{
- switch ((enum dma_data_direction)dma_dir) {
- case DMA_BIDIRECTIONAL:
- return "bidirectional";
- case DMA_TO_DEVICE:
- return "to device";
- case DMA_FROM_DEVICE:
- return "from device";
- default:
- return "none";
- }
-}
-
-static void sata_dwc_tf_dump(struct ata_port *ap, struct ata_taskfile *tf)
-{
- dev_vdbg(ap->dev,
- "taskfile cmd: 0x%02x protocol: %s flags: 0x%lx device: %x\n",
- tf->command, get_prot_descript(tf->protocol), tf->flags,
- tf->device);
- dev_vdbg(ap->dev,
- "feature: 0x%02x nsect: 0x%x lbal: 0x%x lbam: 0x%x lbah: 0x%x\n",
- tf->feature, tf->nsect, tf->lbal, tf->lbam, tf->lbah);
- dev_vdbg(ap->dev,
- "hob_feature: 0x%02x hob_nsect: 0x%x hob_lbal: 0x%x hob_lbam: 0x%x hob_lbah: 0x%x\n",
- tf->hob_feature, tf->hob_nsect, tf->hob_lbal, tf->hob_lbam,
- tf->hob_lbah);
-}
-
static void dma_dwc_xfer_done(void *hsdev_instance)
{
unsigned long flags;
@@ -355,7 +315,7 @@ static void dma_dwc_xfer_done(void *hsdev_instance)
}
if ((hsdevp->dma_interrupt_count % 2) == 0)
- sata_dwc_dma_xfer_complete(ap, 1);
+ sata_dwc_dma_xfer_complete(ap);
spin_unlock_irqrestore(&host->lock, flags);
}
@@ -553,6 +513,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance)
* active tag. It is the tag that matches the command about to
* be completed.
*/
+ trace_ata_bmdma_start(ap, &qc->tf, tag);
qc->ap->link.active_tag = tag;
sata_dwc_bmdma_start_by_tag(qc, tag);
@@ -586,7 +547,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance)
if (status & ATA_ERR) {
dev_dbg(ap->dev, "interrupt ATA_ERR (0x%x)\n", status);
- sata_dwc_qc_complete(ap, qc, 1);
+ sata_dwc_qc_complete(ap, qc);
handled = 1;
goto DONE;
}
@@ -611,13 +572,13 @@ DRVSTILLBUSY:
}
if ((hsdevp->dma_interrupt_count % 2) == 0)
- sata_dwc_dma_xfer_complete(ap, 1);
+ sata_dwc_dma_xfer_complete(ap);
} else if (ata_is_pio(qc->tf.protocol)) {
ata_sff_hsm_move(ap, qc, status, 0);
handled = 1;
goto DONE;
} else {
- if (unlikely(sata_dwc_qc_complete(ap, qc, 1)))
+ if (unlikely(sata_dwc_qc_complete(ap, qc)))
goto DRVSTILLBUSY;
}
@@ -677,7 +638,7 @@ DRVSTILLBUSY:
if (status & ATA_ERR) {
dev_dbg(ap->dev, "%s ATA_ERR (0x%x)\n", __func__,
status);
- sata_dwc_qc_complete(ap, qc, 1);
+ sata_dwc_qc_complete(ap, qc);
handled = 1;
goto DONE;
}
@@ -692,9 +653,9 @@ DRVSTILLBUSY:
dev_warn(ap->dev, "%s: DMA not pending?\n",
__func__);
if ((hsdevp->dma_interrupt_count % 2) == 0)
- sata_dwc_dma_xfer_complete(ap, 1);
+ sata_dwc_dma_xfer_complete(ap);
} else {
- if (unlikely(sata_dwc_qc_complete(ap, qc, 1)))
+ if (unlikely(sata_dwc_qc_complete(ap, qc)))
goto STILLBUSY;
}
continue;
@@ -749,7 +710,7 @@ static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag)
}
}
-static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status)
+static void sata_dwc_dma_xfer_complete(struct ata_port *ap)
{
struct ata_queued_cmd *qc;
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
@@ -763,17 +724,6 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status)
return;
}
-#ifdef DEBUG_NCQ
- if (tag > 0) {
- dev_info(ap->dev,
- "%s tag=%u cmd=0x%02x dma dir=%s proto=%s dmacr=0x%08x\n",
- __func__, qc->hw_tag, qc->tf.command,
- get_dma_dir_descript(qc->dma_dir),
- get_prot_descript(qc->tf.protocol),
- sata_dwc_readl(&hsdev->sata_dwc_regs->dmacr));
- }
-#endif
-
if (ata_is_dma(qc->tf.protocol)) {
if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_NONE) {
dev_err(ap->dev,
@@ -783,15 +733,14 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status)
}
hsdevp->dma_pending[tag] = SATA_DWC_DMA_PENDING_NONE;
- sata_dwc_qc_complete(ap, qc, check_status);
+ sata_dwc_qc_complete(ap, qc);
ap->link.active_tag = ATA_TAG_POISON;
} else {
- sata_dwc_qc_complete(ap, qc, check_status);
+ sata_dwc_qc_complete(ap, qc);
}
}
-static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc,
- u32 check_status)
+static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc)
{
u8 status = 0;
u32 mask = 0x0;
@@ -799,7 +748,6 @@ static int sata_dwc_qc_complete(struct ata_port *ap, struct ata_queued_cmd *qc,
struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
hsdev->sactive_queued = 0;
- dev_dbg(ap->dev, "%s checkstatus? %x\n", __func__, check_status);
if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_TX)
dev_err(ap->dev, "TX DMA PENDING\n");
@@ -980,9 +928,6 @@ static void sata_dwc_exec_command_by_tag(struct ata_port *ap,
{
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
- dev_dbg(ap->dev, "%s cmd(0x%02x): %s tag=%d\n", __func__, tf->command,
- ata_get_cmd_descript(tf->command), tag);
-
hsdevp->cmd_issued[tag] = cmd_issued;
/*
@@ -1005,12 +950,9 @@ static void sata_dwc_bmdma_setup(struct ata_queued_cmd *qc)
{
u8 tag = qc->hw_tag;
- if (ata_is_ncq(qc->tf.protocol)) {
- dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n",
- __func__, qc->ap->link.sactive, tag);
- } else {
+ if (!ata_is_ncq(qc->tf.protocol))
tag = 0;
- }
+
sata_dwc_bmdma_setup_by_tag(qc, tag);
}
@@ -1037,12 +979,6 @@ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag)
start_dma = 0;
}
- dev_dbg(ap->dev,
- "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s start_dma? %x\n",
- __func__, qc, tag, qc->tf.command,
- get_dma_dir_descript(qc->dma_dir), start_dma);
- sata_dwc_tf_dump(ap, &qc->tf);
-
if (start_dma) {
sata_dwc_scr_read(&ap->link, SCR_ERROR, &reg);
if (reg & SATA_DWC_SERROR_ERR_BITS) {
@@ -1067,13 +1003,9 @@ static void sata_dwc_bmdma_start(struct ata_queued_cmd *qc)
{
u8 tag = qc->hw_tag;
- if (ata_is_ncq(qc->tf.protocol)) {
- dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n",
- __func__, qc->ap->link.sactive, tag);
- } else {
+ if (!ata_is_ncq(qc->tf.protocol))
tag = 0;
- }
- dev_dbg(qc->ap->dev, "%s\n", __func__);
+
sata_dwc_bmdma_start_by_tag(qc, tag);
}
@@ -1084,16 +1016,6 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
-#ifdef DEBUG_NCQ
- if (qc->hw_tag > 0 || ap->link.sactive > 1)
- dev_info(ap->dev,
- "%s ap id=%d cmd(0x%02x)=%s qc tag=%d prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
- __func__, ap->print_id, qc->tf.command,
- ata_get_cmd_descript(qc->tf.command),
- qc->hw_tag, get_prot_descript(qc->tf.protocol),
- ap->link.active_tag, ap->link.sactive);
-#endif
-
if (!ata_is_ncq(qc->tf.protocol))
tag = 0;
@@ -1110,11 +1032,9 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
sactive |= (0x00000001 << tag);
sata_dwc_scr_write(&ap->link, SCR_ACTIVE, sactive);
- dev_dbg(qc->ap->dev,
- "%s: tag=%d ap->link.sactive = 0x%08x sactive=0x%08x\n",
- __func__, tag, qc->ap->link.sactive, sactive);
-
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf);
+ trace_ata_exec_command(ap, &qc->tf, tag);
sata_dwc_exec_command_by_tag(ap, &qc->tf, tag,
SATA_DWC_CMD_ISSUED_PEND);
} else {
@@ -1207,6 +1127,8 @@ static const struct ata_port_info sata_dwc_port_info[] = {
static int sata_dwc_probe(struct platform_device *ofdev)
{
+ struct device *dev = &ofdev->dev;
+ struct device_node *np = dev->of_node;
struct sata_dwc_device *hsdev;
u32 idr, versionr;
char *ver = (char *)&versionr;
@@ -1216,23 +1138,21 @@ static int sata_dwc_probe(struct platform_device *ofdev)
struct ata_host *host;
struct ata_port_info pi = sata_dwc_port_info[0];
const struct ata_port_info *ppi[] = { &pi, NULL };
- struct device_node *np = ofdev->dev.of_node;
struct resource *res;
/* Allocate DWC SATA device */
- host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_DWC_MAX_PORTS);
- hsdev = devm_kzalloc(&ofdev->dev, sizeof(*hsdev), GFP_KERNEL);
+ host = ata_host_alloc_pinfo(dev, ppi, SATA_DWC_MAX_PORTS);
+ hsdev = devm_kzalloc(dev, sizeof(*hsdev), GFP_KERNEL);
if (!host || !hsdev)
return -ENOMEM;
host->private_data = hsdev;
/* Ioremap SATA registers */
- res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
- base = devm_ioremap_resource(&ofdev->dev, res);
+ base = devm_platform_get_and_ioremap_resource(ofdev, 0, &res);
if (IS_ERR(base))
return PTR_ERR(base);
- dev_dbg(&ofdev->dev, "ioremap done for SATA register address\n");
+ dev_dbg(dev, "ioremap done for SATA register address\n");
/* Synopsys DWC SATA specific Registers */
hsdev->sata_dwc_regs = base + SATA_DWC_REG_OFFSET;
@@ -1246,11 +1166,10 @@ static int sata_dwc_probe(struct platform_device *ofdev)
/* Read the ID and Version Registers */
idr = sata_dwc_readl(&hsdev->sata_dwc_regs->idr);
versionr = sata_dwc_readl(&hsdev->sata_dwc_regs->versionr);
- dev_notice(&ofdev->dev, "id %d, controller version %c.%c%c\n",
- idr, ver[0], ver[1], ver[2]);
+ dev_notice(dev, "id %d, controller version %c.%c%c\n", idr, ver[0], ver[1], ver[2]);
/* Save dev for later use in dev_xxx() routines */
- hsdev->dev = &ofdev->dev;
+ hsdev->dev = dev;
/* Enable SATA Interrupts */
sata_dwc_enable_interrupts(hsdev);
@@ -1258,7 +1177,7 @@ static int sata_dwc_probe(struct platform_device *ofdev)
/* Get SATA interrupt number */
irq = irq_of_parse_and_map(np, 0);
if (irq == NO_IRQ) {
- dev_err(&ofdev->dev, "no SATA DMA irq\n");
+ dev_err(dev, "no SATA DMA irq\n");
return -ENODEV;
}
@@ -1270,7 +1189,7 @@ static int sata_dwc_probe(struct platform_device *ofdev)
}
#endif
- hsdev->phy = devm_phy_optional_get(hsdev->dev, "sata-phy");
+ hsdev->phy = devm_phy_optional_get(dev, "sata-phy");
if (IS_ERR(hsdev->phy))
return PTR_ERR(hsdev->phy);
@@ -1285,7 +1204,7 @@ static int sata_dwc_probe(struct platform_device *ofdev)
*/
err = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht);
if (err)
- dev_err(&ofdev->dev, "failed to activate host");
+ dev_err(dev, "failed to activate host");
return 0;
@@ -1309,7 +1228,7 @@ static int sata_dwc_remove(struct platform_device *ofdev)
sata_dwc_dma_exit_old(hsdev);
#endif
- dev_dbg(&ofdev->dev, "done\n");
+ dev_dbg(dev, "done\n");
return 0;
}
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 3b31a4f596d8..da0152116d9f 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -221,10 +221,10 @@ enum {
* 4 Dwords per command slot, command header size == 64 Dwords.
*/
struct cmdhdr_tbl_entry {
- u32 cda;
- u32 prde_fis_len;
- u32 ttl;
- u32 desc_info;
+ __le32 cda;
+ __le32 prde_fis_len;
+ __le32 ttl;
+ __le32 desc_info;
};
/*
@@ -246,8 +246,10 @@ enum {
struct command_desc {
u8 cfis[8 * 4];
u8 sfis[8 * 4];
- u8 acmd[4 * 4];
- u8 fill[4 * 4];
+ struct_group(cdb,
+ u8 acmd[4 * 4];
+ u8 fill[4 * 4];
+ );
u32 prdt[SATA_FSL_MAX_PRD_DIRECT * 4];
u32 prdt_indirect[(SATA_FSL_MAX_PRD - SATA_FSL_MAX_PRD_DIRECT) * 4];
};
@@ -257,9 +259,9 @@ struct command_desc {
*/
struct prde {
- u32 dba;
+ __le32 dba;
u8 fill[2 * 4];
- u32 ddc_and_ext;
+ __le32 ddc_and_ext;
};
/*
@@ -311,16 +313,16 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host,
intr_coalescing_ticks = ticks;
spin_unlock_irqrestore(&host->lock, flags);
- DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n",
- intr_coalescing_count, intr_coalescing_ticks);
- DPRINTK("ICC register status: (hcr base: %p) = 0x%x\n",
- hcr_base, ioread32(hcr_base + ICC));
+ dev_dbg(host->dev, "interrupt coalescing, count = 0x%x, ticks = %x\n",
+ intr_coalescing_count, intr_coalescing_ticks);
+ dev_dbg(host->dev, "ICC register status: (hcr base: 0x%p) = 0x%x\n",
+ hcr_base, ioread32(hcr_base + ICC));
}
static ssize_t fsl_sata_intr_coalescing_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d %d\n",
+ return sysfs_emit(buf, "%d %d\n",
intr_coalescing_count, intr_coalescing_ticks);
}
@@ -355,9 +357,9 @@ static ssize_t fsl_sata_rx_watermark_show(struct device *dev,
spin_lock_irqsave(&host->lock, flags);
rx_watermark = ioread32(csr_base + TRANSCFG);
rx_watermark &= 0x1f;
-
spin_unlock_irqrestore(&host->lock, flags);
- return sprintf(buf, "%d\n", rx_watermark);
+
+ return sysfs_emit(buf, "%d\n", rx_watermark);
}
static ssize_t fsl_sata_rx_watermark_store(struct device *dev,
@@ -385,25 +387,27 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev,
return strlen(buf);
}
-static inline unsigned int sata_fsl_tag(unsigned int tag,
+static inline unsigned int sata_fsl_tag(struct ata_port *ap,
+ unsigned int tag,
void __iomem *hcr_base)
{
/* We let libATA core do actual (queue) tag allocation */
if (unlikely(tag >= SATA_FSL_QUEUE_DEPTH)) {
- DPRINTK("tag %d invalid : out of range\n", tag);
+ ata_port_dbg(ap, "tag %d invalid : out of range\n", tag);
return 0;
}
if (unlikely((ioread32(hcr_base + CQ)) & (1 << tag))) {
- DPRINTK("tag %d invalid : in use!!\n", tag);
+ ata_port_dbg(ap, "tag %d invalid : in use!!\n", tag);
return 0;
}
return tag;
}
-static void sata_fsl_setup_cmd_hdr_entry(struct sata_fsl_port_priv *pp,
+static void sata_fsl_setup_cmd_hdr_entry(struct ata_port *ap,
+ struct sata_fsl_port_priv *pp,
unsigned int tag, u32 desc_info,
u32 data_xfer_len, u8 num_prde,
u8 fis_len)
@@ -421,11 +425,11 @@ static void sata_fsl_setup_cmd_hdr_entry(struct sata_fsl_port_priv *pp,
pp->cmdslot[tag].ttl = cpu_to_le32(data_xfer_len & ~0x03);
pp->cmdslot[tag].desc_info = cpu_to_le32(desc_info | (tag & 0x1F));
- VPRINTK("cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n",
- pp->cmdslot[tag].cda,
- pp->cmdslot[tag].prde_fis_len,
- pp->cmdslot[tag].ttl, pp->cmdslot[tag].desc_info);
-
+ ata_port_dbg(ap, "cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n",
+ le32_to_cpu(pp->cmdslot[tag].cda),
+ le32_to_cpu(pp->cmdslot[tag].prde_fis_len),
+ le32_to_cpu(pp->cmdslot[tag].ttl),
+ le32_to_cpu(pp->cmdslot[tag].desc_info));
}
static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
@@ -447,8 +451,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
dma_addr_t indirect_ext_segment_paddr;
unsigned int si;
- VPRINTK("SATA FSL : cd = 0x%p, prd = 0x%p\n", cmd_desc, prd);
-
indirect_ext_segment_paddr = cmd_desc_paddr +
SATA_FSL_CMD_DESC_OFFSET_TO_PRDT + SATA_FSL_MAX_PRD_DIRECT * 16;
@@ -456,9 +458,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
dma_addr_t sg_addr = sg_dma_address(sg);
u32 sg_len = sg_dma_len(sg);
- VPRINTK("SATA FSL : fill_sg, sg_addr = 0x%llx, sg_len = %d\n",
- (unsigned long long)sg_addr, sg_len);
-
/* warn if each s/g element is not dword aligned */
if (unlikely(sg_addr & 0x03))
ata_port_err(qc->ap, "s/g addr unaligned : 0x%llx\n",
@@ -469,7 +468,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
if (num_prde == (SATA_FSL_MAX_PRD_DIRECT - 1) &&
sg_next(sg) != NULL) {
- VPRINTK("setting indirect prde\n");
prd_ptr_to_indirect_ext = prd;
prd->dba = cpu_to_le32(indirect_ext_segment_paddr);
indirect_ext_segment_sz = 0;
@@ -481,9 +479,6 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
prd->dba = cpu_to_le32(sg_addr);
prd->ddc_and_ext = cpu_to_le32(data_snoop | (sg_len & ~0x03));
- VPRINTK("sg_fill, ttl=%d, dba=0x%x, ddc=0x%x\n",
- ttl_dwords, prd->dba, prd->ddc_and_ext);
-
++num_prde;
++prd;
if (prd_ptr_to_indirect_ext)
@@ -508,7 +503,7 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc)
struct sata_fsl_port_priv *pp = ap->private_data;
struct sata_fsl_host_priv *host_priv = ap->host->private_data;
void __iomem *hcr_base = host_priv->hcr_base;
- unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
+ unsigned int tag = sata_fsl_tag(ap, qc->hw_tag, hcr_base);
struct command_desc *cd;
u32 desc_info = CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE;
u32 num_prde = 0;
@@ -520,19 +515,11 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc)
ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, (u8 *) &cd->cfis);
- VPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x\n",
- cd->cfis[0], cd->cfis[1], cd->cfis[2]);
-
- if (qc->tf.protocol == ATA_PROT_NCQ) {
- VPRINTK("FPDMA xfer,Sctor cnt[0:7],[8:15] = %d,%d\n",
- cd->cfis[3], cd->cfis[11]);
- }
-
/* setup "ACMD - atapi command" in cmd. desc. if this is ATAPI cmd */
if (ata_is_atapi(qc->tf.protocol)) {
desc_info |= ATAPI_CMD;
- memset((void *)&cd->acmd, 0, 32);
- memcpy((void *)&cd->acmd, qc->cdb, qc->dev->cdb_len);
+ memset(&cd->cdb, 0, sizeof(cd->cdb));
+ memcpy(&cd->cdb, qc->cdb, qc->dev->cdb_len);
}
if (qc->flags & ATA_QCFLAG_DMAMAP)
@@ -543,10 +530,10 @@ static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc)
if (qc->tf.protocol == ATA_PROT_NCQ)
desc_info |= FPDMA_QUEUED_CMD;
- sata_fsl_setup_cmd_hdr_entry(pp, tag, desc_info, ttl_dwords,
+ sata_fsl_setup_cmd_hdr_entry(ap, pp, tag, desc_info, ttl_dwords,
num_prde, 5);
- VPRINTK("SATA FSL : xx_qc_prep, di = 0x%x, ttl = %d, num_prde = %d\n",
+ ata_port_dbg(ap, "SATA FSL : di = 0x%x, ttl = %d, num_prde = %d\n",
desc_info, ttl_dwords, num_prde);
return AC_ERR_OK;
@@ -557,9 +544,9 @@ static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
struct sata_fsl_host_priv *host_priv = ap->host->private_data;
void __iomem *hcr_base = host_priv->hcr_base;
- unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
+ unsigned int tag = sata_fsl_tag(ap, qc->hw_tag, hcr_base);
- VPRINTK("xx_qc_issue called,CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n",
+ ata_port_dbg(ap, "CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n",
ioread32(CQ + hcr_base),
ioread32(CA + hcr_base),
ioread32(CE + hcr_base), ioread32(CC + hcr_base));
@@ -569,10 +556,10 @@ static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc)
/* Simply queue command to the controller/device */
iowrite32(1 << tag, CQ + hcr_base);
- VPRINTK("xx_qc_issue called, tag=%d, CQ=0x%x, CA=0x%x\n",
+ ata_port_dbg(ap, "tag=%d, CQ=0x%x, CA=0x%x\n",
tag, ioread32(CQ + hcr_base), ioread32(CA + hcr_base));
- VPRINTK("CE=0x%x, DE=0x%x, CC=0x%x, CmdStat = 0x%x\n",
+ ata_port_dbg(ap, "CE=0x%x, DE=0x%x, CC=0x%x, CmdStat = 0x%x\n",
ioread32(CE + hcr_base),
ioread32(DE + hcr_base),
ioread32(CC + hcr_base),
@@ -586,7 +573,7 @@ static bool sata_fsl_qc_fill_rtf(struct ata_queued_cmd *qc)
struct sata_fsl_port_priv *pp = qc->ap->private_data;
struct sata_fsl_host_priv *host_priv = qc->ap->host->private_data;
void __iomem *hcr_base = host_priv->hcr_base;
- unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
+ unsigned int tag = sata_fsl_tag(qc->ap, qc->hw_tag, hcr_base);
struct command_desc *cd;
cd = pp->cmdentry + tag;
@@ -613,7 +600,7 @@ static int sata_fsl_scr_write(struct ata_link *link,
return -EINVAL;
}
- VPRINTK("xx_scr_write, reg_in = %d\n", sc_reg);
+ ata_link_dbg(link, "reg_in = %d\n", sc_reg);
iowrite32(val, ssr_base + (sc_reg * 4));
return 0;
@@ -637,7 +624,7 @@ static int sata_fsl_scr_read(struct ata_link *link,
return -EINVAL;
}
- VPRINTK("xx_scr_read, reg_in = %d\n", sc_reg);
+ ata_link_dbg(link, "reg_in = %d\n", sc_reg);
*val = ioread32(ssr_base + (sc_reg * 4));
return 0;
@@ -649,18 +636,18 @@ static void sata_fsl_freeze(struct ata_port *ap)
void __iomem *hcr_base = host_priv->hcr_base;
u32 temp;
- VPRINTK("xx_freeze, CQ=0x%x, CA=0x%x, CE=0x%x, DE=0x%x\n",
+ ata_port_dbg(ap, "CQ=0x%x, CA=0x%x, CE=0x%x, DE=0x%x\n",
ioread32(CQ + hcr_base),
ioread32(CA + hcr_base),
ioread32(CE + hcr_base), ioread32(DE + hcr_base));
- VPRINTK("CmdStat = 0x%x\n",
+ ata_port_dbg(ap, "CmdStat = 0x%x\n",
ioread32(host_priv->csr_base + COMMANDSTAT));
/* disable interrupts on the controller/port */
temp = ioread32(hcr_base + HCONTROL);
iowrite32((temp & ~0x3F), hcr_base + HCONTROL);
- VPRINTK("in xx_freeze : HControl = 0x%x, HStatus = 0x%x\n",
+ ata_port_dbg(ap, "HControl = 0x%x, HStatus = 0x%x\n",
ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS));
}
@@ -673,7 +660,7 @@ static void sata_fsl_thaw(struct ata_port *ap)
/* ack. any pending IRQs for this controller/port */
temp = ioread32(hcr_base + HSTATUS);
- VPRINTK("xx_thaw, pending IRQs = 0x%x\n", (temp & 0x3F));
+ ata_port_dbg(ap, "pending IRQs = 0x%x\n", (temp & 0x3F));
if (temp & 0x3F)
iowrite32((temp & 0x3F), hcr_base + HSTATUS);
@@ -682,7 +669,7 @@ static void sata_fsl_thaw(struct ata_port *ap)
temp = ioread32(hcr_base + HCONTROL);
iowrite32((temp | DEFAULT_PORT_IRQ_ENABLE_MASK), hcr_base + HCONTROL);
- VPRINTK("xx_thaw : HControl = 0x%x, HStatus = 0x%x\n",
+ ata_port_dbg(ap, "HControl = 0x%x, HStatus = 0x%x\n",
ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS));
}
@@ -744,8 +731,9 @@ static int sata_fsl_port_start(struct ata_port *ap)
ap->private_data = pp;
- VPRINTK("CHBA = 0x%x, cmdentry_phys = 0x%x\n",
- pp->cmdslot_paddr, pp->cmdentry_paddr);
+ ata_port_dbg(ap, "CHBA = 0x%lx, cmdentry_phys = 0x%lx\n",
+ (unsigned long)pp->cmdslot_paddr,
+ (unsigned long)pp->cmdentry_paddr);
/* Now, update the CHBA register in host controller cmd register set */
iowrite32(pp->cmdslot_paddr & 0xffffffff, hcr_base + CHBA);
@@ -761,9 +749,9 @@ static int sata_fsl_port_start(struct ata_port *ap)
temp = ioread32(hcr_base + HCONTROL);
iowrite32((temp | HCONTROL_ONLINE_PHY_RST), hcr_base + HCONTROL);
- VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
- VPRINTK("CHBA = 0x%x\n", ioread32(hcr_base + CHBA));
+ ata_port_dbg(ap, "HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
+ ata_port_dbg(ap, "HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
+ ata_port_dbg(ap, "CHBA = 0x%x\n", ioread32(hcr_base + CHBA));
return 0;
}
@@ -803,16 +791,15 @@ static unsigned int sata_fsl_dev_classify(struct ata_port *ap)
temp = ioread32(hcr_base + SIGNATURE);
- VPRINTK("raw sig = 0x%x\n", temp);
- VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
+ ata_port_dbg(ap, "HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
+ ata_port_dbg(ap, "HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
tf.lbah = (temp >> 24) & 0xff;
tf.lbam = (temp >> 16) & 0xff;
tf.lbal = (temp >> 8) & 0xff;
tf.nsect = temp & 0xff;
- return ata_dev_classify(&tf);
+ return ata_port_classify(ap, &tf);
}
static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class,
@@ -825,8 +812,6 @@ static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class,
int i = 0;
unsigned long start_jiffies;
- DPRINTK("in xx_hardreset\n");
-
try_offline_again:
/*
* Force host controller to go off-line, aborting current operations
@@ -852,9 +837,10 @@ try_offline_again:
goto try_offline_again;
}
- DPRINTK("hardreset, controller off-lined\n");
- VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
+ ata_port_dbg(ap, "hardreset, controller off-lined\n"
+ "HStatus = 0x%x HControl = 0x%x\n",
+ ioread32(hcr_base + HSTATUS),
+ ioread32(hcr_base + HCONTROL));
/*
* PHY reset should remain asserted for atleast 1ms
@@ -882,9 +868,10 @@ try_offline_again:
goto err;
}
- DPRINTK("hardreset, controller off-lined & on-lined\n");
- VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
+ ata_port_dbg(ap, "controller off-lined & on-lined\n"
+ "HStatus = 0x%x HControl = 0x%x\n",
+ ioread32(hcr_base + HSTATUS),
+ ioread32(hcr_base + HCONTROL));
/*
* First, wait for the PHYRDY change to occur before waiting for
@@ -941,10 +928,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
u8 *cfis;
u32 Serror;
- DPRINTK("in xx_softreset\n");
-
if (ata_link_offline(link)) {
- DPRINTK("PHY reports no device\n");
*class = ATA_DEV_NONE;
return 0;
}
@@ -957,19 +941,17 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
* reached here, we can send a command to the target device
*/
- DPRINTK("Sending SRST/device reset\n");
-
ata_tf_init(link->device, &tf);
cfis = (u8 *) &pp->cmdentry->cfis;
/* device reset/SRST is a control register update FIS, uses tag0 */
- sata_fsl_setup_cmd_hdr_entry(pp, 0,
+ sata_fsl_setup_cmd_hdr_entry(ap, pp, 0,
SRST_CMD | CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE, 0, 0, 5);
tf.ctl |= ATA_SRST; /* setup SRST bit in taskfile control reg */
ata_tf_to_fis(&tf, pmp, 0, cfis);
- DPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x, 0x%x\n",
+ ata_port_dbg(ap, "Dumping cfis : 0x%x, 0x%x, 0x%x, 0x%x\n",
cfis[0], cfis[1], cfis[2], cfis[3]);
/*
@@ -977,7 +959,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
* other commands are active on the controller/device
*/
- DPRINTK("@Softreset, CQ = 0x%x, CA = 0x%x, CC = 0x%x\n",
+ ata_port_dbg(ap, "CQ = 0x%x, CA = 0x%x, CC = 0x%x\n",
ioread32(CQ + hcr_base),
ioread32(CA + hcr_base), ioread32(CC + hcr_base));
@@ -990,15 +972,16 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
if (temp & 0x1) {
ata_port_warn(ap, "ATA_SRST issue failed\n");
- DPRINTK("Softreset@5000,CQ=0x%x,CA=0x%x,CC=0x%x\n",
+ ata_port_dbg(ap, "Softreset@5000,CQ=0x%x,CA=0x%x,CC=0x%x\n",
ioread32(CQ + hcr_base),
ioread32(CA + hcr_base), ioread32(CC + hcr_base));
sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror);
- DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
- DPRINTK("Serror = 0x%x\n", Serror);
+ ata_port_dbg(ap, "HStatus = 0x%x HControl = 0x%x Serror = 0x%x\n",
+ ioread32(hcr_base + HSTATUS),
+ ioread32(hcr_base + HCONTROL),
+ Serror);
goto err;
}
@@ -1012,8 +995,9 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
* using ATA signature D2H register FIS to the host controller.
*/
- sata_fsl_setup_cmd_hdr_entry(pp, 0, CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE,
- 0, 0, 5);
+ sata_fsl_setup_cmd_hdr_entry(ap, pp, 0,
+ CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE,
+ 0, 0, 5);
tf.ctl &= ~ATA_SRST; /* 2nd H2D Ctl. register FIS */
ata_tf_to_fis(&tf, pmp, 0, cfis);
@@ -1030,8 +1014,6 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
*/
iowrite32(0x01, CC + hcr_base); /* We know it will be cmd#0 always */
- DPRINTK("SATA FSL : Now checking device signature\n");
-
*class = ATA_DEV_NONE;
/* Verify if SStatus indicates device presence */
@@ -1045,9 +1027,8 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
*class = sata_fsl_dev_classify(ap);
- DPRINTK("class = %d\n", *class);
- VPRINTK("ccreg = 0x%x\n", ioread32(hcr_base + CC));
- VPRINTK("cereg = 0x%x\n", ioread32(hcr_base + CE));
+ ata_port_dbg(ap, "ccreg = 0x%x\n", ioread32(hcr_base + CC));
+ ata_port_dbg(ap, "cereg = 0x%x\n", ioread32(hcr_base + CE));
}
return 0;
@@ -1058,10 +1039,7 @@ err:
static void sata_fsl_error_handler(struct ata_port *ap)
{
-
- DPRINTK("in xx_error_handler\n");
sata_pmp_error_handler(ap);
-
}
static void sata_fsl_post_internal_cmd(struct ata_queued_cmd *qc)
@@ -1102,7 +1080,7 @@ static void sata_fsl_error_intr(struct ata_port *ap)
if (unlikely(SError & 0xFFFF0000))
sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);
- DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
+ ata_port_dbg(ap, "hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
hstatus, cereg, ioread32(hcr_base + DE), SError);
/* handle fatal errors */
@@ -1119,7 +1097,7 @@ static void sata_fsl_error_intr(struct ata_port *ap)
/* Handle PHYRDY change notification */
if (hstatus & INT_ON_PHYRDY_CHG) {
- DPRINTK("SATA FSL: PHYRDY change indication\n");
+ ata_port_dbg(ap, "PHYRDY change indication\n");
/* Setup a soft-reset EH action */
ata_ehi_hotplugged(ehi);
@@ -1140,7 +1118,7 @@ static void sata_fsl_error_intr(struct ata_port *ap)
*/
abort = 1;
- DPRINTK("single device error, CE=0x%x, DE=0x%x\n",
+ ata_port_dbg(ap, "single device error, CE=0x%x, DE=0x%x\n",
ioread32(hcr_base + CE), ioread32(hcr_base + DE));
/* find out the offending link and qc */
@@ -1245,18 +1223,18 @@ static void sata_fsl_host_intr(struct ata_port *ap)
}
if (unlikely(SError & 0xFFFF0000)) {
- DPRINTK("serror @host_intr : 0x%x\n", SError);
+ ata_port_dbg(ap, "serror @host_intr : 0x%x\n", SError);
sata_fsl_error_intr(ap);
}
if (unlikely(hstatus & status_mask)) {
- DPRINTK("error interrupt!!\n");
+ ata_port_dbg(ap, "error interrupt!!\n");
sata_fsl_error_intr(ap);
return;
}
- VPRINTK("Status of all queues :\n");
- VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%llx\n",
+ ata_port_dbg(ap, "Status of all queues :\n");
+ ata_port_dbg(ap, "done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%llx\n",
done_mask,
ioread32(hcr_base + CA),
ioread32(hcr_base + CE),
@@ -1268,15 +1246,13 @@ static void sata_fsl_host_intr(struct ata_port *ap)
/* clear CC bit, this will also complete the interrupt */
iowrite32(done_mask, hcr_base + CC);
- DPRINTK("Status of all queues :\n");
- DPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x\n",
+ ata_port_dbg(ap, "Status of all queues: done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x\n",
done_mask, ioread32(hcr_base + CA),
ioread32(hcr_base + CE));
for (i = 0; i < SATA_FSL_QUEUE_DEPTH; i++) {
if (done_mask & (1 << i))
- DPRINTK
- ("completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n",
+ ata_port_dbg(ap, "completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n",
i, ioread32(hcr_base + CC),
ioread32(hcr_base + CA));
}
@@ -1287,7 +1263,7 @@ static void sata_fsl_host_intr(struct ata_port *ap)
iowrite32(1, hcr_base + CC);
qc = ata_qc_from_tag(ap, ATA_TAG_INTERNAL);
- DPRINTK("completing non-ncq cmd, CC=0x%x\n",
+ ata_port_dbg(ap, "completing non-ncq cmd, CC=0x%x\n",
ioread32(hcr_base + CC));
if (qc) {
@@ -1295,7 +1271,7 @@ static void sata_fsl_host_intr(struct ata_port *ap)
}
} else {
/* Spurious Interrupt!! */
- DPRINTK("spurious interrupt!!, CC = 0x%x\n",
+ ata_port_dbg(ap, "spurious interrupt!!, CC = 0x%x\n",
ioread32(hcr_base + CC));
iowrite32(done_mask, hcr_base + CC);
return;
@@ -1315,8 +1291,6 @@ static irqreturn_t sata_fsl_interrupt(int irq, void *dev_instance)
interrupt_enables = ioread32(hcr_base + HSTATUS);
interrupt_enables &= 0x3F;
- DPRINTK("interrupt status 0x%x\n", interrupt_enables);
-
if (!interrupt_enables)
return IRQ_NONE;
@@ -1369,7 +1343,7 @@ static int sata_fsl_init_controller(struct ata_host *host)
iowrite32((temp & ~0x3F), hcr_base + HCONTROL);
/* Disable interrupt coalescing control(icc), for the moment */
- DPRINTK("icc = 0x%x\n", ioread32(hcr_base + ICC));
+ dev_dbg(host->dev, "icc = 0x%x\n", ioread32(hcr_base + ICC));
iowrite32(0x01000000, hcr_base + ICC);
/* clear error registers, SError is cleared by libATA */
@@ -1388,8 +1362,8 @@ static int sata_fsl_init_controller(struct ata_host *host)
* callback, that should also initiate the OOB, COMINIT sequence
*/
- DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
- DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
+ dev_dbg(host->dev, "HStatus = 0x%x HControl = 0x%x\n",
+ ioread32(hcr_base + HSTATUS), ioread32(hcr_base + HCONTROL));
return 0;
}
@@ -1406,8 +1380,7 @@ static void sata_fsl_host_stop(struct ata_host *host)
* scsi mid-layer and libata interface structures
*/
static struct scsi_host_template sata_fsl_sht = {
- ATA_NCQ_SHT("sata_fsl"),
- .can_queue = SATA_FSL_QUEUE_DEPTH,
+ ATA_NCQ_SHT_QD("sata_fsl", SATA_FSL_QUEUE_DEPTH),
.sg_tablesize = SATA_FSL_MAX_PRD_USABLE,
.dma_boundary = ATA_DMA_BOUNDARY,
};
@@ -1478,9 +1451,8 @@ static int sata_fsl_probe(struct platform_device *ofdev)
iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG);
}
- DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG));
- DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc));
- DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE);
+ dev_dbg(&ofdev->dev, "@reset i/o = 0x%x\n",
+ ioread32(csr_base + TRANSCFG));
host_priv = kzalloc(sizeof(struct sata_fsl_host_priv), GFP_KERNEL);
if (!host_priv)
diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c
index f793564f3d78..440a63de20d0 100644
--- a/drivers/ata/sata_gemini.c
+++ b/drivers/ata/sata_gemini.c
@@ -253,12 +253,12 @@ static int gemini_sata_bridge_init(struct sata_gemini *sg)
ret = clk_prepare_enable(sg->sata0_pclk);
if (ret) {
- pr_err("failed to enable SATA0 PCLK\n");
+ dev_err(dev, "failed to enable SATA0 PCLK\n");
return ret;
}
ret = clk_prepare_enable(sg->sata1_pclk);
if (ret) {
- pr_err("failed to enable SATA1 PCLK\n");
+ dev_err(dev, "failed to enable SATA1 PCLK\n");
clk_disable_unprepare(sg->sata0_pclk);
return ret;
}
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index e517bd8822a5..781901151d82 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -488,8 +488,6 @@ static enum ata_completion_errors inic_qc_prep(struct ata_queued_cmd *qc)
bool is_data = ata_is_data(qc->tf.protocol);
unsigned int cdb_len = 0;
- VPRINTK("ENTER\n");
-
if (is_atapi)
cdb_len = qc->dev->cdb_len;
@@ -657,7 +655,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
}
inic_tf_read(ap, &tf);
- *class = ata_dev_classify(&tf);
+ *class = ata_port_classify(ap, &tf);
}
return 0;
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index cae4c1eab102..53446b997740 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -579,7 +579,7 @@ struct mv_hw_ops {
void (*enable_leds)(struct mv_host_priv *hpriv, void __iomem *mmio);
void (*read_preamp)(struct mv_host_priv *hpriv, int idx,
void __iomem *mmio);
- int (*reset_hc)(struct mv_host_priv *hpriv, void __iomem *mmio,
+ int (*reset_hc)(struct ata_host *host, void __iomem *mmio,
unsigned int n_hc);
void (*reset_flash)(struct mv_host_priv *hpriv, void __iomem *mmio);
void (*reset_bus)(struct ata_host *host, void __iomem *mmio);
@@ -606,7 +606,7 @@ static void mv5_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
static void mv5_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio);
static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx,
void __iomem *mmio);
-static int mv5_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
+static int mv5_reset_hc(struct ata_host *host, void __iomem *mmio,
unsigned int n_hc);
static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio);
static void mv5_reset_bus(struct ata_host *host, void __iomem *mmio);
@@ -616,14 +616,14 @@ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio);
static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
void __iomem *mmio);
-static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
+static int mv6_reset_hc(struct ata_host *host, void __iomem *mmio,
unsigned int n_hc);
static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio);
static void mv_soc_enable_leds(struct mv_host_priv *hpriv,
void __iomem *mmio);
static void mv_soc_read_preamp(struct mv_host_priv *hpriv, int idx,
void __iomem *mmio);
-static int mv_soc_reset_hc(struct mv_host_priv *hpriv,
+static int mv_soc_reset_hc(struct ata_host *host,
void __iomem *mmio, unsigned int n_hc);
static void mv_soc_reset_flash(struct mv_host_priv *hpriv,
void __iomem *mmio);
@@ -1248,81 +1248,74 @@ static int mv_stop_edma(struct ata_port *ap)
return err;
}
-#ifdef ATA_DEBUG
-static void mv_dump_mem(void __iomem *start, unsigned bytes)
+static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes)
{
- int b, w;
+ int b, w, o;
+ unsigned char linebuf[38];
+
for (b = 0; b < bytes; ) {
- DPRINTK("%p: ", start + b);
- for (w = 0; b < bytes && w < 4; w++) {
- printk("%08x ", readl(start + b));
+ for (w = 0, o = 0; b < bytes && w < 4; w++) {
+ o += snprintf(linebuf + o, sizeof(linebuf) - o,
+ "%08x ", readl(start + b));
b += sizeof(u32);
}
- printk("\n");
+ dev_dbg(dev, "%s: %p: %s\n",
+ __func__, start + b, linebuf);
}
}
-#endif
-#if defined(ATA_DEBUG) || defined(CONFIG_PCI)
+
static void mv_dump_pci_cfg(struct pci_dev *pdev, unsigned bytes)
{
-#ifdef ATA_DEBUG
- int b, w;
- u32 dw;
+ int b, w, o;
+ u32 dw = 0;
+ unsigned char linebuf[38];
+
for (b = 0; b < bytes; ) {
- DPRINTK("%02x: ", b);
- for (w = 0; b < bytes && w < 4; w++) {
+ for (w = 0, o = 0; b < bytes && w < 4; w++) {
(void) pci_read_config_dword(pdev, b, &dw);
- printk("%08x ", dw);
+ o += snprintf(linebuf + o, sizeof(linebuf) - o,
+ "%08x ", dw);
b += sizeof(u32);
}
- printk("\n");
+ dev_dbg(&pdev->dev, "%s: %02x: %s\n",
+ __func__, b, linebuf);
}
-#endif
}
-#endif
-static void mv_dump_all_regs(void __iomem *mmio_base, int port,
+
+static void mv_dump_all_regs(void __iomem *mmio_base,
struct pci_dev *pdev)
{
-#ifdef ATA_DEBUG
- void __iomem *hc_base = mv_hc_base(mmio_base,
- port >> MV_PORT_HC_SHIFT);
+ void __iomem *hc_base;
void __iomem *port_base;
int start_port, num_ports, p, start_hc, num_hcs, hc;
- if (0 > port) {
- start_hc = start_port = 0;
- num_ports = 8; /* shld be benign for 4 port devs */
- num_hcs = 2;
- } else {
- start_hc = port >> MV_PORT_HC_SHIFT;
- start_port = port;
- num_ports = num_hcs = 1;
- }
- DPRINTK("All registers for port(s) %u-%u:\n", start_port,
- num_ports > 1 ? num_ports - 1 : start_port);
+ start_hc = start_port = 0;
+ num_ports = 8; /* should be benign for 4 port devs */
+ num_hcs = 2;
+ dev_dbg(&pdev->dev,
+ "%s: All registers for port(s) %u-%u:\n", __func__,
+ start_port, num_ports > 1 ? num_ports - 1 : start_port);
- if (NULL != pdev) {
- DPRINTK("PCI config space regs:\n");
- mv_dump_pci_cfg(pdev, 0x68);
- }
- DPRINTK("PCI regs:\n");
- mv_dump_mem(mmio_base+0xc00, 0x3c);
- mv_dump_mem(mmio_base+0xd00, 0x34);
- mv_dump_mem(mmio_base+0xf00, 0x4);
- mv_dump_mem(mmio_base+0x1d00, 0x6c);
+ dev_dbg(&pdev->dev, "%s: PCI config space regs:\n", __func__);
+ mv_dump_pci_cfg(pdev, 0x68);
+
+ dev_dbg(&pdev->dev, "%s: PCI regs:\n", __func__);
+ mv_dump_mem(&pdev->dev, mmio_base+0xc00, 0x3c);
+ mv_dump_mem(&pdev->dev, mmio_base+0xd00, 0x34);
+ mv_dump_mem(&pdev->dev, mmio_base+0xf00, 0x4);
+ mv_dump_mem(&pdev->dev, mmio_base+0x1d00, 0x6c);
for (hc = start_hc; hc < start_hc + num_hcs; hc++) {
hc_base = mv_hc_base(mmio_base, hc);
- DPRINTK("HC regs (HC %i):\n", hc);
- mv_dump_mem(hc_base, 0x1c);
+ dev_dbg(&pdev->dev, "%s: HC regs (HC %i):\n", __func__, hc);
+ mv_dump_mem(&pdev->dev, hc_base, 0x1c);
}
for (p = start_port; p < start_port + num_ports; p++) {
port_base = mv_port_base(mmio_base, p);
- DPRINTK("EDMA regs (port %i):\n", p);
- mv_dump_mem(port_base, 0x54);
- DPRINTK("SATA regs (port %i):\n", p);
- mv_dump_mem(port_base+0x300, 0x60);
+ dev_dbg(&pdev->dev, "%s: EDMA regs (port %i):\n", __func__, p);
+ mv_dump_mem(&pdev->dev, port_base, 0x54);
+ dev_dbg(&pdev->dev, "%s: SATA regs (port %i):\n", __func__, p);
+ mv_dump_mem(&pdev->dev, port_base+0x300, 0x60);
}
-#endif
}
static unsigned int mv_scr_offset(unsigned int sc_reg_in)
@@ -2962,8 +2955,8 @@ static int mv_pci_error(struct ata_host *host, void __iomem *mmio)
dev_err(host->dev, "PCI ERROR; PCI IRQ cause=0x%08x\n", err_cause);
- DPRINTK("All regs @ PCI error\n");
- mv_dump_all_regs(mmio, -1, to_pci_dev(host->dev));
+ dev_dbg(host->dev, "%s: All regs @ PCI error\n", __func__);
+ mv_dump_all_regs(mmio, to_pci_dev(host->dev));
writelfl(0, mmio + hpriv->irq_cause_offset);
@@ -3201,9 +3194,10 @@ static void mv5_reset_one_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
}
#undef ZERO
-static int mv5_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
+static int mv5_reset_hc(struct ata_host *host, void __iomem *mmio,
unsigned int n_hc)
{
+ struct mv_host_priv *hpriv = host->private_data;
unsigned int hc, port;
for (hc = 0; hc < n_hc; hc++) {
@@ -3262,7 +3256,7 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
* LOCKING:
* Inherited from caller.
*/
-static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
+static int mv6_reset_hc(struct ata_host *host, void __iomem *mmio,
unsigned int n_hc)
{
void __iomem *reg = mmio + PCI_MAIN_CMD_STS;
@@ -3282,7 +3276,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
break;
}
if (!(PCI_MASTER_EMPTY & t)) {
- printk(KERN_ERR DRV_NAME ": PCI master won't flush\n");
+ dev_err(host->dev, "PCI master won't flush\n");
rc = 1;
goto done;
}
@@ -3296,7 +3290,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
} while (!(GLOB_SFT_RST & t) && (i-- > 0));
if (!(GLOB_SFT_RST & t)) {
- printk(KERN_ERR DRV_NAME ": can't set global reset\n");
+ dev_err(host->dev, "can't set global reset\n");
rc = 1;
goto done;
}
@@ -3310,7 +3304,7 @@ static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
} while ((GLOB_SFT_RST & t) && (i-- > 0));
if (GLOB_SFT_RST & t) {
- printk(KERN_ERR DRV_NAME ": can't clear global reset\n");
+ dev_err(host->dev, "can't clear global reset\n");
rc = 1;
}
done:
@@ -3479,9 +3473,10 @@ static void mv_soc_reset_one_hc(struct mv_host_priv *hpriv,
#undef ZERO
-static int mv_soc_reset_hc(struct mv_host_priv *hpriv,
+static int mv_soc_reset_hc(struct ata_host *host,
void __iomem *mmio, unsigned int n_hc)
{
+ struct mv_host_priv *hpriv = host->private_data;
unsigned int port;
for (port = 0; port < hpriv->n_ports; port++)
@@ -3723,11 +3718,6 @@ static void mv_port_init(struct ata_ioports *port, void __iomem *port_mmio)
/* unmask all non-transient EDMA error interrupts */
writelfl(~EDMA_ERR_IRQ_TRANSIENT, port_mmio + EDMA_ERR_IRQ_MASK);
-
- VPRINTK("EDMA cfg=0x%08x EDMA IRQ err cause/mask=0x%08x/0x%08x\n",
- readl(port_mmio + EDMA_CFG),
- readl(port_mmio + EDMA_ERR_IRQ_CAUSE),
- readl(port_mmio + EDMA_ERR_IRQ_MASK));
}
static unsigned int mv_in_pcix_mode(struct ata_host *host)
@@ -3859,11 +3849,11 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
*
* Warn the user, lest they think we're just buggy.
*/
- printk(KERN_WARNING DRV_NAME ": Highpoint RocketRAID"
+ dev_warn(&pdev->dev, "Highpoint RocketRAID"
" BIOS CORRUPTS DATA on all attached drives,"
" regardless of if/how they are configured."
" BEWARE!\n");
- printk(KERN_WARNING DRV_NAME ": For data safety, do not"
+ dev_warn(&pdev->dev, "For data safety, do not"
" use sectors 8-9 on \"Legacy\" drives,"
" and avoid the final two gigabytes on"
" all RocketRAID BIOS initialized drives.\n");
@@ -3954,7 +3944,7 @@ static int mv_init_host(struct ata_host *host)
if (hpriv->ops->read_preamp)
hpriv->ops->read_preamp(hpriv, port, mmio);
- rc = hpriv->ops->reset_hc(hpriv, mmio, n_hc);
+ rc = hpriv->ops->reset_hc(host, mmio, n_hc);
if (rc)
goto done;
@@ -3972,7 +3962,7 @@ static int mv_init_host(struct ata_host *host)
for (hc = 0; hc < n_hc; hc++) {
void __iomem *hc_mmio = mv_hc_base(mmio, hc);
- VPRINTK("HC%i: HC config=0x%08x HC IRQ cause "
+ dev_dbg(host->dev, "HC%i: HC config=0x%08x HC IRQ cause "
"(before clear)=0x%08x\n", hc,
readl(hc_mmio + HC_CFG),
readl(hc_mmio + HC_IRQ_CAUSE));
@@ -4270,7 +4260,7 @@ static int mv_platform_resume(struct platform_device *pdev)
/* initialize adapter */
ret = mv_init_host(host);
if (ret) {
- printk(KERN_ERR DRV_NAME ": Error during HW init\n");
+ dev_err(&pdev->dev, "Error during HW init\n");
return ret;
}
ata_host_resume(host);
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 16272c111208..7f14d0d31057 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -31,6 +31,7 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi_device.h>
#include <linux/libata.h>
+#include <trace/events/libata.h>
#define DRV_NAME "sata_nv"
#define DRV_VERSION "3.5"
@@ -808,7 +809,7 @@ static int nv_adma_check_cpb(struct ata_port *ap, int cpb_num, int force_err)
struct nv_adma_port_priv *pp = ap->private_data;
u8 flags = pp->cpb[cpb_num].resp_flags;
- VPRINTK("CPB %d, flags=0x%x\n", cpb_num, flags);
+ ata_port_dbg(ap, "CPB %d, flags=0x%x\n", cpb_num, flags);
if (unlikely((force_err ||
flags & (NV_CPB_RESP_ATA_ERR |
@@ -1100,8 +1101,6 @@ static int nv_adma_port_start(struct ata_port *ap)
struct pci_dev *pdev = to_pci_dev(dev);
u16 tmp;
- VPRINTK("ENTER\n");
-
/*
* Ensure DMA mask is set to 32-bit before allocating legacy PRD and
* pad buffers.
@@ -1190,7 +1189,6 @@ static void nv_adma_port_stop(struct ata_port *ap)
struct nv_adma_port_priv *pp = ap->private_data;
void __iomem *mmio = pp->ctl_block;
- VPRINTK("ENTER\n");
writew(0, mmio + NV_ADMA_CTL);
}
@@ -1252,8 +1250,6 @@ static void nv_adma_setup_port(struct ata_port *ap)
void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
struct ata_ioports *ioport = &ap->ioaddr;
- VPRINTK("ENTER\n");
-
mmio += NV_ADMA_PORT + ap->port_no * NV_ADMA_PORT_SIZE;
ioport->cmd_addr = mmio;
@@ -1277,8 +1273,6 @@ static int nv_adma_host_init(struct ata_host *host)
unsigned int i;
u32 tmp32;
- VPRINTK("ENTER\n");
-
/* enable ADMA on the ports */
pci_read_config_dword(pdev, NV_MCP_SATA_CFG_20, &tmp32);
tmp32 |= NV_MCP_SATA_CFG_20_PORT0_EN |
@@ -1320,8 +1314,6 @@ static void nv_adma_fill_sg(struct ata_queued_cmd *qc, struct nv_adma_cpb *cpb)
struct scatterlist *sg;
unsigned int si;
- VPRINTK("ENTER\n");
-
for_each_sg(qc->sg, sg, qc->n_elem, si) {
aprd = (si < 5) ? &cpb->aprd[si] :
&pp->aprd[NV_ADMA_SGTBL_LEN * qc->hw_tag + (si-5)];
@@ -1378,8 +1370,6 @@ static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc)
if (qc->tf.protocol == ATA_PROT_NCQ)
ctl_flags |= NV_CPB_CTL_QUEUE | NV_CPB_CTL_FPDMA;
- VPRINTK("qc->flags = 0x%lx\n", qc->flags);
-
nv_adma_tf_to_cpb(&qc->tf, cpb->tf);
if (qc->flags & ATA_QCFLAG_DMAMAP) {
@@ -1404,8 +1394,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
void __iomem *mmio = pp->ctl_block;
int curr_ncq = (qc->tf.protocol == ATA_PROT_NCQ);
- VPRINTK("ENTER\n");
-
/* We can't handle result taskfile with NCQ commands, since
retrieving the taskfile switches us out of ADMA mode and would abort
existing commands. */
@@ -1417,7 +1405,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
if (nv_adma_use_reg_mode(qc)) {
/* use ATA register mode */
- VPRINTK("using ATA register mode: 0x%lx\n", qc->flags);
BUG_ON(!(pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) &&
(qc->flags & ATA_QCFLAG_DMAMAP));
nv_adma_register_mode(qc->ap);
@@ -1438,8 +1425,6 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
writew(qc->hw_tag, mmio + NV_ADMA_APPEND);
- DPRINTK("Issued tag %u\n", qc->hw_tag);
-
return 0;
}
@@ -1871,12 +1856,12 @@ static void nv_swncq_host_init(struct ata_host *host)
/* enable swncq */
tmp = readl(mmio + NV_CTL_MCP55);
- VPRINTK("HOST_CTL:0x%X\n", tmp);
+ dev_dbg(&pdev->dev, "HOST_CTL:0x%X\n", tmp);
writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55);
/* enable irq intr */
tmp = readl(mmio + NV_INT_ENABLE_MCP55);
- VPRINTK("HOST_ENABLE:0x%X\n", tmp);
+ dev_dbg(&pdev->dev, "HOST_ENABLE:0x%X\n", tmp);
writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55);
/* clear port irq */
@@ -2017,19 +2002,17 @@ static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap,
if (qc == NULL)
return 0;
- DPRINTK("Enter\n");
-
writel((1 << qc->hw_tag), pp->sactive_block);
pp->last_issue_tag = qc->hw_tag;
pp->dhfis_bits &= ~(1 << qc->hw_tag);
pp->dmafis_bits &= ~(1 << qc->hw_tag);
pp->qc_active |= (0x1 << qc->hw_tag);
+ trace_ata_tf_load(ap, &qc->tf);
ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
+ trace_ata_exec_command(ap, &qc->tf, qc->hw_tag);
ap->ops->sff_exec_command(ap, &qc->tf);
- DPRINTK("Issued tag %u\n", qc->hw_tag);
-
return 0;
}
@@ -2041,8 +2024,6 @@ static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc)
if (qc->tf.protocol != ATA_PROT_NCQ)
return ata_bmdma_qc_issue(qc);
- DPRINTK("Enter\n");
-
if (!pp->qc_active)
nv_swncq_issue_atacmd(ap, qc);
else
@@ -2087,6 +2068,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap)
u8 lack_dhfis = 0;
host_stat = ap->ops->bmdma_status(ap);
+ trace_ata_bmdma_status(ap, host_stat);
if (unlikely(host_stat & ATA_DMA_ERR)) {
/* error when transferring data to/from memory */
ata_ehi_clear_desc(ehi);
@@ -2109,7 +2091,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap)
ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
if (!ap->qc_active) {
- DPRINTK("over\n");
+ ata_port_dbg(ap, "over\n");
nv_swncq_pp_reinit(ap);
return 0;
}
@@ -2124,12 +2106,12 @@ static int nv_swncq_sdbfis(struct ata_port *ap)
*/
lack_dhfis = 1;
- DPRINTK("id 0x%x QC: qc_active 0x%llx,"
- "SWNCQ:qc_active 0x%X defer_bits %X "
- "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n",
- ap->print_id, ap->qc_active, pp->qc_active,
- pp->defer_queue.defer_bits, pp->dhfis_bits,
- pp->dmafis_bits, pp->last_issue_tag);
+ ata_port_dbg(ap, "QC: qc_active 0x%llx,"
+ "SWNCQ:qc_active 0x%X defer_bits %X "
+ "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n",
+ ap->qc_active, pp->qc_active,
+ pp->defer_queue.defer_bits, pp->dhfis_bits,
+ pp->dmafis_bits, pp->last_issue_tag);
nv_swncq_fis_reinit(ap);
@@ -2169,7 +2151,7 @@ static void nv_swncq_dmafis(struct ata_port *ap)
__ata_bmdma_stop(ap);
tag = nv_swncq_tag(ap);
- DPRINTK("dma setup tag 0x%x\n", tag);
+ ata_port_dbg(ap, "dma setup tag 0x%x\n", tag);
qc = ata_qc_from_tag(ap, tag);
if (unlikely(!qc))
@@ -2237,9 +2219,9 @@ static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis)
if (fis & NV_SWNCQ_IRQ_SDBFIS) {
pp->ncq_flags |= ncq_saw_sdb;
- DPRINTK("id 0x%x SWNCQ: qc_active 0x%X "
+ ata_port_dbg(ap, "SWNCQ: qc_active 0x%X "
"dhfis 0x%X dmafis 0x%X sactive 0x%X\n",
- ap->print_id, pp->qc_active, pp->dhfis_bits,
+ pp->qc_active, pp->dhfis_bits,
pp->dmafis_bits, readl(pp->sactive_block));
if (nv_swncq_sdbfis(ap) < 0)
goto irq_error;
@@ -2265,7 +2247,7 @@ static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis)
goto irq_exit;
if (pp->defer_queue.defer_bits) {
- DPRINTK("send next command\n");
+ ata_port_dbg(ap, "send next command\n");
qc = nv_swncq_qc_from_dq(ap);
nv_swncq_issue_atacmd(ap, qc);
}
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 7815da8ef9e5..b8465fef2ed2 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -596,7 +596,8 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
prd[idx].addr = cpu_to_le32(addr);
prd[idx].flags_len = cpu_to_le32(len & 0xffff);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+ ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n",
+ idx, addr, len);
idx++;
sg_len -= len;
@@ -609,17 +610,16 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
if (len > SG_COUNT_ASIC_BUG) {
u32 addr;
- VPRINTK("Splitting last PRD.\n");
-
addr = le32_to_cpu(prd[idx - 1].addr);
prd[idx - 1].flags_len = cpu_to_le32(len - SG_COUNT_ASIC_BUG);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx - 1, addr, SG_COUNT_ASIC_BUG);
+ ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n",
+ idx - 1, addr, SG_COUNT_ASIC_BUG);
addr = addr + len - SG_COUNT_ASIC_BUG;
len = SG_COUNT_ASIC_BUG;
prd[idx].addr = cpu_to_le32(addr);
prd[idx].flags_len = cpu_to_le32(len);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+ ata_port_dbg(ap, "PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
idx++;
}
@@ -632,8 +632,6 @@ static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc)
struct pdc_port_priv *pp = qc->ap->private_data;
unsigned int i;
- VPRINTK("ENTER\n");
-
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
pdc_fill_sg(qc);
@@ -922,12 +920,8 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance)
u32 hotplug_status;
int is_sataii_tx4;
- VPRINTK("ENTER\n");
-
- if (!host || !host->iomap[PDC_MMIO_BAR]) {
- VPRINTK("QUICK EXIT\n");
+ if (!host || !host->iomap[PDC_MMIO_BAR])
return IRQ_NONE;
- }
host_mmio = host->iomap[PDC_MMIO_BAR];
@@ -946,23 +940,18 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance)
/* reading should also clear interrupts */
mask = readl(host_mmio + PDC_INT_SEQMASK);
- if (mask == 0xffffffff && hotplug_status == 0) {
- VPRINTK("QUICK EXIT 2\n");
+ if (mask == 0xffffffff && hotplug_status == 0)
goto done_irq;
- }
mask &= 0xffff; /* only 16 SEQIDs possible */
- if (mask == 0 && hotplug_status == 0) {
- VPRINTK("QUICK EXIT 3\n");
+ if (mask == 0 && hotplug_status == 0)
goto done_irq;
- }
writel(mask, host_mmio + PDC_INT_SEQMASK);
is_sataii_tx4 = pdc_is_sataii_tx4(host->ports[0]->flags);
for (i = 0; i < host->n_ports; i++) {
- VPRINTK("port %u\n", i);
ap = host->ports[i];
/* check for a plug or unplug event */
@@ -989,8 +978,6 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance)
}
}
- VPRINTK("EXIT\n");
-
done_irq:
spin_unlock(&host->lock);
return IRQ_RETVAL(handled);
@@ -1005,8 +992,6 @@ static void pdc_packet_start(struct ata_queued_cmd *qc)
unsigned int port_no = ap->port_no;
u8 seq = (u8) (port_no + 1);
- VPRINTK("ENTER, ap %p\n", ap);
-
writel(0x00000001, host_mmio + (seq * 4));
readl(host_mmio + (seq * 4)); /* flush */
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index ef00ab644afb..8ca0810aad26 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -252,9 +252,6 @@ static unsigned int qs_fill_sg(struct ata_queued_cmd *qc)
len = sg_dma_len(sg);
*(__le32 *)prd = cpu_to_le32(len);
prd += sizeof(u64);
-
- VPRINTK("PRD[%u] = (0x%llX, 0x%X)\n", si,
- (unsigned long long)addr, len);
}
return si;
@@ -268,8 +265,6 @@ static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc)
u64 addr;
unsigned int nelem;
- VPRINTK("ENTER\n");
-
qs_enter_reg_mode(qc->ap);
if (qc->tf.protocol != ATA_PROT_DMA)
return AC_ERR_OK;
@@ -304,8 +299,6 @@ static inline void qs_packet_start(struct ata_queued_cmd *qc)
struct ata_port *ap = qc->ap;
u8 __iomem *chan = qs_mmio_base(ap->host) + (ap->port_no * 0x4000);
- VPRINTK("ENTER, ap %p\n", ap);
-
writeb(QS_CTR0_CLER, chan + QS_CCT_CTR0);
wmb(); /* flush PRDs and pkt to memory */
writel(QS_CCF_RUN_PKT, chan + QS_CCT_CFF);
@@ -374,8 +367,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host *host)
struct qs_port_priv *pp = ap->private_data;
struct ata_queued_cmd *qc;
- DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n",
- sff1, sff0, port_no, sHST, sDST);
+ dev_dbg(host->dev, "SFF=%08x%08x: sHST=%d sDST=%02x\n",
+ sff1, sff0, sHST, sDST);
handled = 1;
if (!pp || pp->state != qs_state_pkt)
continue;
@@ -435,14 +428,10 @@ static irqreturn_t qs_intr(int irq, void *dev_instance)
unsigned int handled = 0;
unsigned long flags;
- VPRINTK("ENTER\n");
-
spin_lock_irqsave(&host->lock, flags);
handled = qs_intr_pkt(host) | qs_intr_mmio(host);
spin_unlock_irqrestore(&host->lock, flags);
- VPRINTK("EXIT\n");
-
return IRQ_RETVAL(handled);
}
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 44b0ed8f6bb8..3d96b6faa3f0 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -323,8 +323,6 @@ static int sata_rcar_bus_softreset(struct ata_port *ap, unsigned long deadline)
{
struct ata_ioports *ioaddr = &ap->ioaddr;
- DPRINTK("ata%u: bus reset via SRST\n", ap->print_id);
-
/* software reset. causes dev0 to be selected */
iowrite32(ap->ctl, ioaddr->ctl_addr);
udelay(20);
@@ -350,7 +348,6 @@ static int sata_rcar_softreset(struct ata_link *link, unsigned int *classes,
devmask |= 1 << 0;
/* issue bus reset */
- DPRINTK("about to softreset, devmask=%x\n", devmask);
rc = sata_rcar_bus_softreset(ap, deadline);
/* if link is occupied, -ENODEV too is an error */
if (rc && (rc != -ENODEV || sata_scr_valid(link))) {
@@ -361,7 +358,6 @@ static int sata_rcar_softreset(struct ata_link *link, unsigned int *classes,
/* determine by signature whether we have ATA or ATAPI devices */
classes[0] = ata_sff_dev_classify(&link->device[0], devmask, &err);
- DPRINTK("classes[0]=%u\n", classes[0]);
return 0;
}
@@ -383,12 +379,6 @@ static void sata_rcar_tf_load(struct ata_port *ap,
iowrite32(tf->hob_lbal, ioaddr->lbal_addr);
iowrite32(tf->hob_lbam, ioaddr->lbam_addr);
iowrite32(tf->hob_lbah, ioaddr->lbah_addr);
- VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
- tf->hob_feature,
- tf->hob_nsect,
- tf->hob_lbal,
- tf->hob_lbam,
- tf->hob_lbah);
}
if (is_addr) {
@@ -397,18 +387,10 @@ static void sata_rcar_tf_load(struct ata_port *ap,
iowrite32(tf->lbal, ioaddr->lbal_addr);
iowrite32(tf->lbam, ioaddr->lbam_addr);
iowrite32(tf->lbah, ioaddr->lbah_addr);
- VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n",
- tf->feature,
- tf->nsect,
- tf->lbal,
- tf->lbam,
- tf->lbah);
}
- if (tf->flags & ATA_TFLAG_DEVICE) {
+ if (tf->flags & ATA_TFLAG_DEVICE)
iowrite32(tf->device, ioaddr->device_addr);
- VPRINTK("device 0x%X\n", tf->device);
- }
ata_wait_idle(ap);
}
@@ -440,8 +422,6 @@ static void sata_rcar_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
static void sata_rcar_exec_command(struct ata_port *ap,
const struct ata_taskfile *tf)
{
- DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
-
iowrite32(tf->command, ap->ioaddr.command_addr);
ata_sff_pause(ap);
}
@@ -499,7 +479,6 @@ static void sata_rcar_drain_fifo(struct ata_queued_cmd *qc)
count < 65536; count += 2)
ioread32(ap->ioaddr.data_addr);
- /* Can become DEBUG later */
if (count)
ata_port_dbg(ap, "drained %d bytes to clear DRQ\n", count);
}
@@ -543,7 +522,6 @@ static void sata_rcar_bmdma_fill_sg(struct ata_queued_cmd *qc)
prd[si].addr = cpu_to_le32(addr);
prd[si].flags_len = cpu_to_le32(sg_len);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", si, addr, sg_len);
}
/* end-of-table flag */
@@ -685,7 +663,7 @@ static void sata_rcar_serr_interrupt(struct ata_port *ap)
if (!serror)
return;
- DPRINTK("SError @host_intr: 0x%x\n", serror);
+ ata_port_dbg(ap, "SError @host_intr: 0x%x\n", serror);
/* first, analyze and record host port events */
ata_ehi_clear_desc(ehi);
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 75321f1ceba5..3b989a52879d 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -307,7 +307,6 @@ static void sil_fill_sg(struct ata_queued_cmd *qc)
prd->addr = cpu_to_le32(addr);
prd->flags_len = cpu_to_le32(sg_len);
- VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", si, addr, sg_len);
last_prd = prd;
prd++;
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index f99ec6f7d7c0..2fef6ce93f07 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -656,8 +656,6 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class,
const char *reason;
int rc;
- DPRINTK("ENTER\n");
-
/* put the port into known state */
if (sil24_init_port(ap)) {
reason = "port not ready";
@@ -680,9 +678,8 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class,
}
sil24_read_tf(ap, 0, &tf);
- *class = ata_dev_classify(&tf);
+ *class = ata_port_classify(ap, &tf);
- DPRINTK("EXIT, class=%u\n", *class);
return 0;
err:
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 4c01190a5e37..6ceec59cb291 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -78,6 +78,9 @@
#define DRV_NAME "sata_sx4"
#define DRV_VERSION "0.12"
+static int dimm_test;
+module_param(dimm_test, int, 0644);
+MODULE_PARM_DESC(dimm_test, "Enable DIMM test during startup (1 = enabled)");
enum {
PDC_MMIO_BAR = 3,
@@ -211,10 +214,8 @@ static unsigned int pdc20621_i2c_read(struct ata_host *host,
u32 device, u32 subaddr, u32 *pdata);
static int pdc20621_prog_dimm0(struct ata_host *host);
static unsigned int pdc20621_prog_dimm_global(struct ata_host *host);
-#ifdef ATA_VERBOSE_DEBUG
static void pdc20621_get_from_dimm(struct ata_host *host,
void *psource, u32 offset, u32 size);
-#endif
static void pdc20621_put_to_dimm(struct ata_host *host,
void *psource, u32 offset, u32 size);
static void pdc20621_irq_clear(struct ata_port *ap);
@@ -308,15 +309,9 @@ static inline void pdc20621_ata_sg(u8 *buf, unsigned int portno,
/* output ATA packet S/G table */
addr = PDC_20621_DIMM_BASE + PDC_20621_DIMM_DATA +
(PDC_DIMM_DATA_STEP * portno);
- VPRINTK("ATA sg addr 0x%x, %d\n", addr, addr);
+
buf32[dw] = cpu_to_le32(addr);
buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT);
-
- VPRINTK("ATA PSG @ %x == (0x%x, 0x%x)\n",
- PDC_20621_DIMM_BASE +
- (PDC_DIMM_WINDOW_STEP * portno) +
- PDC_DIMM_APKT_PRD,
- buf32[dw], buf32[dw + 1]);
}
static inline void pdc20621_host_sg(u8 *buf, unsigned int portno,
@@ -332,12 +327,6 @@ static inline void pdc20621_host_sg(u8 *buf, unsigned int portno,
buf32[dw] = cpu_to_le32(addr);
buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT);
-
- VPRINTK("HOST PSG @ %x == (0x%x, 0x%x)\n",
- PDC_20621_DIMM_BASE +
- (PDC_DIMM_WINDOW_STEP * portno) +
- PDC_DIMM_HPKT_PRD,
- buf32[dw], buf32[dw + 1]);
}
static inline unsigned int pdc20621_ata_pkt(struct ata_taskfile *tf,
@@ -351,7 +340,6 @@ static inline unsigned int pdc20621_ata_pkt(struct ata_taskfile *tf,
unsigned int dimm_sg = PDC_20621_DIMM_BASE +
(PDC_DIMM_WINDOW_STEP * portno) +
PDC_DIMM_APKT_PRD;
- VPRINTK("ENTER, dimm_sg == 0x%x, %d\n", dimm_sg, dimm_sg);
i = PDC_DIMM_ATA_PKT;
@@ -406,8 +394,6 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf,
unsigned int dimm_sg = PDC_20621_DIMM_BASE +
(PDC_DIMM_WINDOW_STEP * portno) +
PDC_DIMM_HPKT_PRD;
- VPRINTK("ENTER, dimm_sg == 0x%x, %d\n", dimm_sg, dimm_sg);
- VPRINTK("host_sg == 0x%x, %d\n", host_sg, host_sg);
dw = PDC_DIMM_HOST_PKT >> 2;
@@ -424,14 +410,6 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf,
buf32[dw + 1] = cpu_to_le32(host_sg);
buf32[dw + 2] = cpu_to_le32(dimm_sg);
buf32[dw + 3] = 0;
-
- VPRINTK("HOST PKT @ %x == (0x%x 0x%x 0x%x 0x%x)\n",
- PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * portno) +
- PDC_DIMM_HOST_PKT,
- buf32[dw + 0],
- buf32[dw + 1],
- buf32[dw + 2],
- buf32[dw + 3]);
}
static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
@@ -447,8 +425,6 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
WARN_ON(!(qc->flags & ATA_QCFLAG_DMAMAP));
- VPRINTK("ata%u: ENTER\n", ap->print_id);
-
/* hard-code chip #0 */
mmio += PDC_CHIP0_OFS;
@@ -492,7 +468,8 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
readl(dimm_mmio); /* MMIO PCI posting flush */
- VPRINTK("ata pkt buf ofs %u, prd size %u, mmio copied\n", i, sgt_len);
+ ata_port_dbg(ap, "ata pkt buf ofs %u, prd size %u, mmio copied\n",
+ i, sgt_len);
}
static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
@@ -504,8 +481,6 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
unsigned int portno = ap->port_no;
unsigned int i;
- VPRINTK("ata%u: ENTER\n", ap->print_id);
-
/* hard-code chip #0 */
mmio += PDC_CHIP0_OFS;
@@ -527,7 +502,7 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
readl(dimm_mmio); /* MMIO PCI posting flush */
- VPRINTK("ata pkt buf ofs %u, mmio copied\n", i);
+ ata_port_dbg(ap, "ata pkt buf ofs %u, mmio copied\n", i);
}
static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc)
@@ -601,7 +576,6 @@ static void pdc20621_pop_hdma(struct ata_queued_cmd *qc)
pp->hdma_cons++;
}
-#ifdef ATA_VERBOSE_DEBUG
static void pdc20621_dump_hdma(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
@@ -611,14 +585,10 @@ static void pdc20621_dump_hdma(struct ata_queued_cmd *qc)
dimm_mmio += (port_no * PDC_DIMM_WINDOW_STEP);
dimm_mmio += PDC_DIMM_HOST_PKT;
- printk(KERN_ERR "HDMA[0] == 0x%08X\n", readl(dimm_mmio));
- printk(KERN_ERR "HDMA[1] == 0x%08X\n", readl(dimm_mmio + 4));
- printk(KERN_ERR "HDMA[2] == 0x%08X\n", readl(dimm_mmio + 8));
- printk(KERN_ERR "HDMA[3] == 0x%08X\n", readl(dimm_mmio + 12));
+ ata_port_dbg(ap, "HDMA 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ readl(dimm_mmio), readl(dimm_mmio + 4),
+ readl(dimm_mmio + 8), readl(dimm_mmio + 12));
}
-#else
-static inline void pdc20621_dump_hdma(struct ata_queued_cmd *qc) { }
-#endif /* ATA_VERBOSE_DEBUG */
static void pdc20621_packet_start(struct ata_queued_cmd *qc)
{
@@ -633,8 +603,6 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc)
/* hard-code chip #0 */
mmio += PDC_CHIP0_OFS;
- VPRINTK("ata%u: ENTER\n", ap->print_id);
-
wmb(); /* flush PRD, pkt writes */
port_ofs = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * port_no);
@@ -645,7 +613,7 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc)
pdc20621_dump_hdma(qc);
pdc20621_push_hdma(qc, seq, port_ofs + PDC_DIMM_HOST_PKT);
- VPRINTK("queued ofs 0x%x (%u), seq %u\n",
+ ata_port_dbg(ap, "queued ofs 0x%x (%u), seq %u\n",
port_ofs + PDC_DIMM_HOST_PKT,
port_ofs + PDC_DIMM_HOST_PKT,
seq);
@@ -656,7 +624,7 @@ static void pdc20621_packet_start(struct ata_queued_cmd *qc)
writel(port_ofs + PDC_DIMM_ATA_PKT,
ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
readl(ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
- VPRINTK("submitted ofs 0x%x (%u), seq %u\n",
+ ata_port_dbg(ap, "submitted ofs 0x%x (%u), seq %u\n",
port_ofs + PDC_DIMM_ATA_PKT,
port_ofs + PDC_DIMM_ATA_PKT,
seq);
@@ -696,14 +664,12 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
u8 status;
unsigned int handled = 0;
- VPRINTK("ENTER\n");
-
if ((qc->tf.protocol == ATA_PROT_DMA) && /* read */
(!(qc->tf.flags & ATA_TFLAG_WRITE))) {
/* step two - DMA from DIMM to host */
if (doing_hdma) {
- VPRINTK("ata%u: read hdma, 0x%x 0x%x\n", ap->print_id,
+ ata_port_dbg(ap, "read hdma, 0x%x 0x%x\n",
readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
/* get drive status; clear intr; complete txn */
qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
@@ -714,7 +680,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
/* step one - exec ATA command */
else {
u8 seq = (u8) (port_no + 1 + 4);
- VPRINTK("ata%u: read ata, 0x%x 0x%x\n", ap->print_id,
+ ata_port_dbg(ap, "read ata, 0x%x 0x%x\n",
readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
/* submit hdma pkt */
@@ -729,7 +695,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
/* step one - DMA from host to DIMM */
if (doing_hdma) {
u8 seq = (u8) (port_no + 1);
- VPRINTK("ata%u: write hdma, 0x%x 0x%x\n", ap->print_id,
+ ata_port_dbg(ap, "write hdma, 0x%x 0x%x\n",
readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
/* submit ata pkt */
@@ -742,7 +708,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
/* step two - execute ATA command */
else {
- VPRINTK("ata%u: write ata, 0x%x 0x%x\n", ap->print_id,
+ ata_port_dbg(ap, "write ata, 0x%x 0x%x\n",
readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
/* get drive status; clear intr; complete txn */
qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
@@ -755,7 +721,7 @@ static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
} else if (qc->tf.protocol == ATA_PROT_NODATA) {
status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
- DPRINTK("BUS_NODATA (drv_stat 0x%X)\n", status);
+ ata_port_dbg(ap, "BUS_NODATA (drv_stat 0x%X)\n", status);
qc->err_mask |= ac_err_mask(status);
ata_qc_complete(qc);
handled = 1;
@@ -781,29 +747,21 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance)
unsigned int handled = 0;
void __iomem *mmio_base;
- VPRINTK("ENTER\n");
-
- if (!host || !host->iomap[PDC_MMIO_BAR]) {
- VPRINTK("QUICK EXIT\n");
+ if (!host || !host->iomap[PDC_MMIO_BAR])
return IRQ_NONE;
- }
mmio_base = host->iomap[PDC_MMIO_BAR];
/* reading should also clear interrupts */
mmio_base += PDC_CHIP0_OFS;
mask = readl(mmio_base + PDC_20621_SEQMASK);
- VPRINTK("mask == 0x%x\n", mask);
- if (mask == 0xffffffff) {
- VPRINTK("QUICK EXIT 2\n");
+ if (mask == 0xffffffff)
return IRQ_NONE;
- }
+
mask &= 0xffff; /* only 16 tags possible */
- if (!mask) {
- VPRINTK("QUICK EXIT 3\n");
+ if (!mask)
return IRQ_NONE;
- }
spin_lock(&host->lock);
@@ -816,7 +774,8 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance)
else
ap = host->ports[port_no];
tmp = mask & (1 << i);
- VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp);
+ if (ap)
+ ata_port_dbg(ap, "seq %u, tmp %x\n", i, tmp);
if (tmp && ap) {
struct ata_queued_cmd *qc;
@@ -829,10 +788,6 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance)
spin_unlock(&host->lock);
- VPRINTK("mask == 0x%x\n", mask);
-
- VPRINTK("EXIT\n");
-
return IRQ_RETVAL(handled);
}
@@ -979,7 +934,6 @@ static void pdc_sata_setup_port(struct ata_ioports *port, void __iomem *base)
}
-#ifdef ATA_VERBOSE_DEBUG
static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
u32 offset, u32 size)
{
@@ -1029,7 +983,6 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
memcpy_fromio(psource, dimm_mmio, size / 4);
}
}
-#endif
static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
@@ -1226,15 +1179,16 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_host *host)
/* Turn on for ECC */
if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
PDC_DIMM_SPD_TYPE, &spd0)) {
- pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n",
- PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE);
+ dev_err(host->dev,
+ "Failed in i2c read: device=%#x, subaddr=%#x\n",
+ PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE);
return 1;
}
if (spd0 == 0x02) {
data |= (0x01 << 16);
writel(data, mmio + PDC_SDRAM_CONTROL);
readl(mmio + PDC_SDRAM_CONTROL);
- printk(KERN_ERR "Local DIMM ECC Enabled\n");
+ dev_err(host->dev, "Local DIMM ECC Enabled\n");
}
/* DIMM Initialization Select/Enable (bit 18/19) */
@@ -1274,7 +1228,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
/* Initialize Time Period Register */
writel(0xffffffff, mmio + PDC_TIME_PERIOD);
time_period = readl(mmio + PDC_TIME_PERIOD);
- VPRINTK("Time Period Register (0x40): 0x%x\n", time_period);
+ dev_dbg(host->dev, "Time Period Register (0x40): 0x%x\n", time_period);
/* Enable timer */
writel(PDC_TIMER_DEFAULT, mmio + PDC_TIME_CONTROL);
@@ -1289,7 +1243,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
*/
tcount = readl(mmio + PDC_TIME_COUNTER);
- VPRINTK("Time Counter Register (0x44): 0x%x\n", tcount);
+ dev_dbg(host->dev, "Time Counter Register (0x44): 0x%x\n", tcount);
/*
If SX4 is on PCI-X bus, after 3 seconds, the timer counter
@@ -1297,17 +1251,19 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
*/
if (tcount >= PCI_X_TCOUNT) {
ticks = (time_period - tcount);
- VPRINTK("Num counters 0x%x (%d)\n", ticks, ticks);
+ dev_dbg(host->dev, "Num counters 0x%x (%d)\n", ticks, ticks);
clock = (ticks / 300000);
- VPRINTK("10 * Internal clk = 0x%x (%d)\n", clock, clock);
+ dev_dbg(host->dev, "10 * Internal clk = 0x%x (%d)\n",
+ clock, clock);
clock = (clock * 33);
- VPRINTK("10 * Internal clk * 33 = 0x%x (%d)\n", clock, clock);
+ dev_dbg(host->dev, "10 * Internal clk * 33 = 0x%x (%d)\n",
+ clock, clock);
/* PLL F Param (bit 22:16) */
fparam = (1400000 / clock) - 2;
- VPRINTK("PLL F Param: 0x%x (%d)\n", fparam, fparam);
+ dev_dbg(host->dev, "PLL F Param: 0x%x (%d)\n", fparam, fparam);
/* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */
pci_status = (0x8a001824 | (fparam << 16));
@@ -1315,7 +1271,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
pci_status = PCI_PLL_INIT;
/* Initialize PLL. */
- VPRINTK("pci_status: 0x%x\n", pci_status);
+ dev_dbg(host->dev, "pci_status: 0x%x\n", pci_status);
writel(pci_status, mmio + PDC_CTL_STATUS);
readl(mmio + PDC_CTL_STATUS);
@@ -1324,23 +1280,23 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
and program the DIMM Module Controller.
*/
if (!(speed = pdc20621_detect_dimm(host))) {
- printk(KERN_ERR "Detect Local DIMM Fail\n");
+ dev_err(host->dev, "Detect Local DIMM Fail\n");
return 1; /* DIMM error */
}
- VPRINTK("Local DIMM Speed = %d\n", speed);
+ dev_dbg(host->dev, "Local DIMM Speed = %d\n", speed);
/* Programming DIMM0 Module Control Register (index_CID0:80h) */
size = pdc20621_prog_dimm0(host);
- VPRINTK("Local DIMM Size = %dMB\n", size);
+ dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size);
/* Programming DIMM Module Global Control Register (index_CID0:88h) */
if (pdc20621_prog_dimm_global(host)) {
- printk(KERN_ERR "Programming DIMM Module Global Control Register Fail\n");
+ dev_err(host->dev,
+ "Programming DIMM Module Global Control Register Fail\n");
return 1;
}
-#ifdef ATA_VERBOSE_DEBUG
- {
+ if (dimm_test) {
u8 test_parttern1[40] =
{0x55,0xAA,'P','r','o','m','i','s','e',' ',
'N','o','t',' ','Y','e','t',' ',
@@ -1354,31 +1310,33 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
pdc20621_put_to_dimm(host, test_parttern1, 0x10040, 40);
pdc20621_get_from_dimm(host, test_parttern2, 0x40, 40);
- printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
+ dev_info(host->dev, "DIMM test pattern 1: %x, %x, %s\n", test_parttern2[0],
test_parttern2[1], &(test_parttern2[2]));
pdc20621_get_from_dimm(host, test_parttern2, 0x10040,
40);
- printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
- test_parttern2[1], &(test_parttern2[2]));
+ dev_info(host->dev, "DIMM test pattern 2: %x, %x, %s\n",
+ test_parttern2[0],
+ test_parttern2[1], &(test_parttern2[2]));
pdc20621_put_to_dimm(host, test_parttern1, 0x40, 40);
pdc20621_get_from_dimm(host, test_parttern2, 0x40, 40);
- printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
- test_parttern2[1], &(test_parttern2[2]));
+ dev_info(host->dev, "DIMM test pattern 3: %x, %x, %s\n",
+ test_parttern2[0],
+ test_parttern2[1], &(test_parttern2[2]));
}
-#endif
/* ECC initiliazation. */
if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
PDC_DIMM_SPD_TYPE, &spd0)) {
- pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n",
+ dev_err(host->dev,
+ "Failed in i2c read: device=%#x, subaddr=%#x\n",
PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE);
return 1;
}
if (spd0 == 0x02) {
void *buf;
- VPRINTK("Start ECC initialization\n");
+ dev_dbg(host->dev, "Start ECC initialization\n");
addr = 0;
length = size * 1024 * 1024;
buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL);
@@ -1390,7 +1348,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
addr += ECC_ERASE_BUF_SZ;
}
kfree(buf);
- VPRINTK("Finish ECC initialization\n");
+ dev_dbg(host->dev, "Finish ECC initialization\n");
}
return 0;
}
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index bc8e8d9f176b..3e726ee91fdc 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -178,7 +178,6 @@ static void ia_hack_tcq(IADEV *dev) {
static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
u_short desc_num, i;
- struct sk_buff *skb;
struct ia_vcc *iavcc_r = NULL;
unsigned long delta;
static unsigned long timer = 0;
@@ -202,8 +201,7 @@ static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
else
dev->ffL.tcq_rd -= 2;
*(u_short *)(dev->seg_ram + dev->ffL.tcq_rd) = i+1;
- if (!(skb = dev->desc_tbl[i].txskb) ||
- !(iavcc_r = dev->desc_tbl[i].iavcc))
+ if (!dev->desc_tbl[i].txskb || !(iavcc_r = dev->desc_tbl[i].iavcc))
printk("Fatal err, desc table vcc or skb is NULL\n");
else
iavcc_r->vc_desc_cnt--;
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index bc1876915457..eaa31e567d1e 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,7 +14,6 @@
#include <linux/of.h>
#include <asm/sections.h>
-#include <asm/pgalloc.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -155,66 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
}
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
-{
- int nid = early_cpu_to_node(cpu);
-
- return memblock_alloc_try_nid(size, align,
- __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-static void __init pcpu_populate_pte(unsigned long addr)
-{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-#endif
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -229,7 +168,7 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ early_cpu_to_node);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
@@ -239,10 +178,7 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
- pcpu_fc_alloc,
- pcpu_fc_free,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
#endif
if (rc < 0)
panic("Failed to initialize percpu areas (err=%d).", rc);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 1e2c2d3882e2..f41063ac1aee 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -65,8 +65,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla
const char *dev_name, void *data)
{
struct super_block *s = mnt->mnt_sb;
+ int err;
+
atomic_inc(&s->s_active);
down_write(&s->s_umount);
+ err = reconfigure_single(s, flags, data);
+ if (err < 0) {
+ deactivate_locked_super(s);
+ return ERR_PTR(err);
+ }
return dget(s->s_root);
}
diff --git a/drivers/base/firmware_loader/builtin/Makefile b/drivers/base/firmware_loader/builtin/Makefile
index eb4be452062a..6c067dedc01e 100644
--- a/drivers/base/firmware_loader/builtin/Makefile
+++ b/drivers/base/firmware_loader/builtin/Makefile
@@ -3,10 +3,10 @@ obj-y += main.o
# Create $(fwdir) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a
# leading /, it's relative to $(srctree).
-fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR))
+fwdir := $(CONFIG_EXTRA_FIRMWARE_DIR)
fwdir := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
-firmware := $(addsuffix .gen.o, $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)))
+firmware := $(addsuffix .gen.o, $(CONFIG_EXTRA_FIRMWARE))
obj-y += $(firmware)
FWNAME = $(patsubst $(obj)/%.gen.S,%,$@)
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index d7d63c1aa993..4afb0e9312c0 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -199,11 +199,16 @@ static struct class firmware_class = {
int register_sysfs_loader(void)
{
- return class_register(&firmware_class);
+ int ret = class_register(&firmware_class);
+
+ if (ret != 0)
+ return ret;
+ return register_firmware_config_sysctl();
}
void unregister_sysfs_loader(void)
{
+ unregister_firmware_config_sysctl();
class_unregister(&firmware_class);
}
diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h
index 3af7205b302f..9f3055d3b4ca 100644
--- a/drivers/base/firmware_loader/fallback.h
+++ b/drivers/base/firmware_loader/fallback.h
@@ -42,6 +42,17 @@ void fw_fallback_set_default_timeout(void);
int register_sysfs_loader(void);
void unregister_sysfs_loader(void);
+#ifdef CONFIG_SYSCTL
+extern int register_firmware_config_sysctl(void);
+extern void unregister_firmware_config_sysctl(void);
+#else
+static inline int register_firmware_config_sysctl(void)
+{
+ return 0;
+}
+static inline void unregister_firmware_config_sysctl(void) { }
+#endif /* CONFIG_SYSCTL */
+
#else /* CONFIG_FW_LOADER_USER_HELPER */
static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name,
struct device *device,
diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c
index 46a731dede6f..e5ac098d0742 100644
--- a/drivers/base/firmware_loader/fallback_table.c
+++ b/drivers/base/firmware_loader/fallback_table.c
@@ -4,6 +4,7 @@
#include <linux/kconfig.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <linux/security.h>
#include <linux/highmem.h>
#include <linux/umh.h>
@@ -24,7 +25,7 @@ struct firmware_fallback_config fw_fallback_config = {
EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE);
#ifdef CONFIG_SYSCTL
-struct ctl_table firmware_config_table[] = {
+static struct ctl_table firmware_config_table[] = {
{
.procname = "force_sysfs_fallback",
.data = &fw_fallback_config.force_sysfs_fallback,
@@ -45,4 +46,24 @@ struct ctl_table firmware_config_table[] = {
},
{ }
};
-#endif
+
+static struct ctl_table_header *firmware_config_sysct_table_header;
+int register_firmware_config_sysctl(void)
+{
+ firmware_config_sysct_table_header =
+ register_sysctl("kernel/firmware_config",
+ firmware_config_table);
+ if (!firmware_config_sysct_table_header)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE);
+
+void unregister_firmware_config_sysctl(void)
+{
+ unregister_sysctl_table(firmware_config_sysct_table_header);
+ firmware_config_sysct_table_header = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE);
+
+#endif /* CONFIG_SYSCTL */
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 94665037f4a3..72b7a92337b1 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
struct rtc_time time;
unsigned int val;
- mc146818_get_time(&time);
+ if (mc146818_get_time(&time) < 0) {
+ pr_err("Unable to read current time from RTC\n");
+ return 0;
+ }
+
pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
val = time.tm_year; /* 100 years */
if (val > 100)
diff --git a/drivers/base/property.c b/drivers/base/property.c
index a74c21af97c1..e6497f6877ee 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -920,6 +920,22 @@ int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index)
EXPORT_SYMBOL(fwnode_irq_get);
/**
+ * fwnode_iomap - Maps the memory mapped IO for a given fwnode
+ * @fwnode: Pointer to the firmware node
+ * @index: Index of the IO range
+ *
+ * Returns a pointer to the mapped memory.
+ */
+void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index)
+{
+ if (IS_ENABLED(CONFIG_OF_ADDRESS) && is_of_node(fwnode))
+ return of_iomap(to_of_node(fwnode), index);
+
+ return NULL;
+}
+EXPORT_SYMBOL(fwnode_iomap);
+
+/**
* fwnode_graph_get_next_endpoint - Get next endpoint firmware node
* @fwnode: Pointer to the parent firmware node
* @prev: Previous endpoint node or %NULL to get the first
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 588889bea7c3..6af111f568e4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -122,7 +122,7 @@ newtag(struct aoedev *d)
register ulong n;
n = jiffies & 0xffff;
- return n |= (++d->lasttag & 0x7fff) << 16;
+ return n | (++d->lasttag & 0x7fff) << 16;
}
static u32
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8fe2e4289dae..6e3f2f0d2352 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
-static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
static int brd_alloc(int i)
@@ -372,21 +371,14 @@ static int brd_alloc(int i)
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
- mutex_lock(&brd_devices_mutex);
- list_for_each_entry(brd, &brd_devices, brd_list) {
- if (brd->brd_number == i) {
- mutex_unlock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list)
+ if (brd->brd_number == i)
return -EEXIST;
- }
- }
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd) {
- mutex_unlock(&brd_devices_mutex);
+ if (!brd)
return -ENOMEM;
- }
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
- mutex_unlock(&brd_devices_mutex);
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
@@ -429,9 +421,7 @@ static int brd_alloc(int i)
out_cleanup_disk:
blk_cleanup_disk(disk);
out_free_dev:
- mutex_lock(&brd_devices_mutex);
list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
kfree(brd);
return err;
}
@@ -441,15 +431,19 @@ static void brd_probe(dev_t dev)
brd_alloc(MINOR(dev) / max_part);
}
-static void brd_del_one(struct brd_device *brd)
+static void brd_cleanup(void)
{
- del_gendisk(brd->brd_disk);
- blk_cleanup_disk(brd->brd_disk);
- brd_free_pages(brd);
- mutex_lock(&brd_devices_mutex);
- list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
- kfree(brd);
+ struct brd_device *brd, *next;
+
+ debugfs_remove_recursive(brd_debugfs_dir);
+
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+ del_gendisk(brd->brd_disk);
+ blk_cleanup_disk(brd->brd_disk);
+ brd_free_pages(brd);
+ list_del(&brd->brd_list);
+ kfree(brd);
+ }
}
static inline void brd_check_and_reset_par(void)
@@ -473,9 +467,18 @@ static inline void brd_check_and_reset_par(void)
static int __init brd_init(void)
{
- struct brd_device *brd, *next;
int err, i;
+ brd_check_and_reset_par();
+
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
+ for (i = 0; i < rd_nr; i++) {
+ err = brd_alloc(i);
+ if (err)
+ goto out_free;
+ }
+
/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
@@ -491,28 +494,16 @@ static int __init brd_init(void)
* dynamically.
*/
- if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
- return -EIO;
-
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
+ if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
+ err = -EIO;
+ goto out_free;
}
pr_info("brd: module loaded\n");
return 0;
out_free:
- unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module NOT loaded !!!\n");
return err;
@@ -520,13 +511,9 @@ out_free:
static void __exit brd_exit(void)
{
- struct brd_device *brd, *next;
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module unloaded\n");
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..01cbbfc4e9e2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -820,7 +820,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+ struct rb_node **node, *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
diff --git a/drivers/block/paride/bpck.c b/drivers/block/paride/bpck.c
index f5f63ca2889d..d880a9465e9b 100644
--- a/drivers/block/paride/bpck.c
+++ b/drivers/block/paride/bpck.c
@@ -28,6 +28,7 @@
#undef r2
#undef w2
+#undef PC
#define PC pi->private
#define r2() (PC=(in_p(2) & 0xff))
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8f140da1efe3..4203cdab8abf 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf)
* These are the characters that produce nonzero for
* isspace() in the "C" and "POSIX" locales.
*/
- const char *spaces = " \f\n\r\t\v";
+ static const char spaces[] = " \f\n\r\t\v";
*buf += strspn(*buf, spaces); /* Find start of token */
@@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
- ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+ ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
+ ',');
if (ret)
goto out_err;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 8f8443ee6fe4..c08971de369f 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -196,7 +196,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
} else if (cpu != 0) {
/* Search from 0 to cpu */
- bit = find_next_bit(sess->cpu_queues_bm, cpu, 0);
+ bit = find_first_bit(sess->cpu_queues_bm, cpu);
if (bit < cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c3dc3cd7a779..c443cd64fc9b 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -976,7 +976,7 @@ static void virtblk_remove(struct virtio_device *vdev)
mutex_lock(&vblk->vdev_mutex);
/* Stop all the virtqueues. */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
/* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
vblk->vdev = NULL;
@@ -995,7 +995,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
struct virtio_blk *vblk = vdev->priv;
/* Ensure we don't receive any more interrupts */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f6da5293b913..cb253d80d72b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1903,14 +1903,7 @@ static struct attribute *zram_disk_attrs[] = {
NULL,
};
-static const struct attribute_group zram_disk_attr_group = {
- .attrs = zram_disk_attrs,
-};
-
-static const struct attribute_group *zram_disk_attr_groups[] = {
- &zram_disk_attr_group,
- NULL,
-};
+ATTRIBUTE_GROUPS(zram_disk);
/*
* Allocate and initialize new zram device. the function returns
@@ -1983,7 +1976,7 @@ static int zram_add(void)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
- ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
+ ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
index 076e4942a3f0..67c21263f9e0 100644
--- a/drivers/bluetooth/virtio_bt.c
+++ b/drivers/bluetooth/virtio_bt.c
@@ -367,7 +367,7 @@ static void virtbt_remove(struct virtio_device *vdev)
struct hci_dev *hdev = vbt->hdev;
hci_unregister_dev(hdev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
hci_free_dev(hdev);
vbt->hdev = NULL;
diff --git a/drivers/bus/mhi/core/boot.c b/drivers/bus/mhi/core/boot.c
index 0a972620a403..74295d3cc662 100644
--- a/drivers/bus/mhi/core/boot.c
+++ b/drivers/bus/mhi/core/boot.c
@@ -417,7 +417,7 @@ void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl)
}
/* wait for ready on pass through or any other execution environment */
- if (mhi_cntrl->ee != MHI_EE_EDL && mhi_cntrl->ee != MHI_EE_PBL)
+ if (!MHI_FW_LOAD_CAPABLE(mhi_cntrl->ee))
goto fw_load_ready_state;
fw_name = (mhi_cntrl->ee == MHI_EE_EDL) ?
diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c
index 5aaca6d0f52b..046f407dc5d6 100644
--- a/drivers/bus/mhi/core/init.c
+++ b/drivers/bus/mhi/core/init.c
@@ -79,7 +79,8 @@ static const char * const mhi_pm_state_str[] = {
const char *to_mhi_pm_state_str(enum mhi_pm_state state)
{
- int index = find_last_bit((unsigned long *)&state, 32);
+ unsigned long pm_state = state;
+ int index = find_last_bit(&pm_state, 32);
if (index >= ARRAY_SIZE(mhi_pm_state_str))
return "Invalid State";
@@ -788,6 +789,7 @@ static int parse_ch_cfg(struct mhi_controller *mhi_cntrl,
mhi_chan->offload_ch = ch_cfg->offload_channel;
mhi_chan->db_cfg.reset_req = ch_cfg->doorbell_mode_switch;
mhi_chan->pre_alloc = ch_cfg->auto_queue;
+ mhi_chan->wake_capable = ch_cfg->wake_capable;
/*
* If MHI host allocates buffers, then the channel direction
diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h
index 3a732afaf73e..e2e10474a9d9 100644
--- a/drivers/bus/mhi/core/internal.h
+++ b/drivers/bus/mhi/core/internal.h
@@ -390,7 +390,8 @@ extern const char * const mhi_ee_str[MHI_EE_MAX];
#define MHI_IN_PBL(ee) (ee == MHI_EE_PBL || ee == MHI_EE_PTHRU || \
ee == MHI_EE_EDL)
-
+#define MHI_POWER_UP_CAPABLE(ee) (MHI_IN_PBL(ee) || ee == MHI_EE_AMSS)
+#define MHI_FW_LOAD_CAPABLE(ee) (ee == MHI_EE_PBL || ee == MHI_EE_EDL)
#define MHI_IN_MISSION_MODE(ee) (ee == MHI_EE_AMSS || ee == MHI_EE_WFW || \
ee == MHI_EE_FP)
@@ -681,8 +682,12 @@ void mhi_deinit_free_irq(struct mhi_controller *mhi_cntrl);
void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
struct image_info *img_info);
void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan);
+ struct mhi_chan *mhi_chan, unsigned int flags);
+
int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
struct mhi_chan *mhi_chan);
void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
index b15c5bc37dd4..ffde617f93a3 100644
--- a/drivers/bus/mhi/core/main.c
+++ b/drivers/bus/mhi/core/main.c
@@ -1065,7 +1065,7 @@ void mhi_ctrl_ev_task(unsigned long data)
return;
}
- /* Process ctrl events events */
+ /* Process ctrl events */
ret = mhi_event->process_event(mhi_cntrl, mhi_event, U32_MAX);
/*
@@ -1430,7 +1430,7 @@ exit_unprepare_channel:
}
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan)
+ struct mhi_chan *mhi_chan, unsigned int flags)
{
int ret = 0;
struct device *dev = &mhi_chan->mhi_dev->dev;
@@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
if (ret)
goto error_pm_state;
+ if (mhi_chan->dir == DMA_FROM_DEVICE)
+ mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
+
/* Pre-allocate buffer for xfer ring */
if (mhi_chan->pre_alloc) {
int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
@@ -1464,6 +1467,7 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
while (nr_el--) {
void *buf;
struct mhi_buf_info info = { };
+
buf = kmalloc(len, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
@@ -1609,8 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
read_unlock_bh(&mhi_cntrl->pm_lock);
}
-/* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+static int __mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
{
int ret, dir;
struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
@@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
if (!mhi_chan)
continue;
- ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+ ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
if (ret)
goto error_open_chan;
}
@@ -1639,8 +1642,19 @@ error_open_chan:
return ret;
}
+
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+{
+ return __mhi_prepare_for_transfer(mhi_dev, 0);
+}
EXPORT_SYMBOL_GPL(mhi_prepare_for_transfer);
+int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev)
+{
+ return __mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
+}
+EXPORT_SYMBOL_GPL(mhi_prepare_for_transfer_autoqueue);
+
void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev)
{
struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
index 547e6e769546..4aae0baea008 100644
--- a/drivers/bus/mhi/core/pm.c
+++ b/drivers/bus/mhi/core/pm.c
@@ -42,7 +42,7 @@
* L3: LD_ERR_FATAL_DETECT <--> LD_ERR_FATAL_DETECT
* LD_ERR_FATAL_DETECT -> DISABLE
*/
-static struct mhi_pm_transitions const dev_state_transitions[] = {
+static const struct mhi_pm_transitions dev_state_transitions[] = {
/* L0 States */
{
MHI_PM_DISABLE,
@@ -1053,7 +1053,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
enum mhi_ee_type current_ee;
enum dev_st_transition next_state;
struct device *dev = &mhi_cntrl->mhi_dev->dev;
- u32 val;
+ u32 interval_us = 25000; /* poll register field every 25 milliseconds */
int ret;
dev_info(dev, "Requested to power ON\n");
@@ -1070,10 +1070,6 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
mutex_lock(&mhi_cntrl->pm_mutex);
mhi_cntrl->pm_state = MHI_PM_DISABLE;
- ret = mhi_init_irq_setup(mhi_cntrl);
- if (ret)
- goto error_setup_irq;
-
/* Setup BHI INTVEC */
write_lock_irq(&mhi_cntrl->pm_lock);
mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
@@ -1083,11 +1079,11 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
write_unlock_irq(&mhi_cntrl->pm_lock);
/* Confirm that the device is in valid exec env */
- if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) {
+ if (!MHI_POWER_UP_CAPABLE(current_ee)) {
dev_err(dev, "%s is not a valid EE for power on\n",
TO_MHI_EXEC_STR(current_ee));
ret = -EIO;
- goto error_async_power_up;
+ goto error_exit;
}
state = mhi_get_mhi_state(mhi_cntrl);
@@ -1096,20 +1092,12 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
if (state == MHI_STATE_SYS_ERR) {
mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
- ret = wait_event_timeout(mhi_cntrl->state_event,
- MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state) ||
- mhi_read_reg_field(mhi_cntrl,
- mhi_cntrl->regs,
- MHICTRL,
- MHICTRL_RESET_MASK,
- MHICTRL_RESET_SHIFT,
- &val) ||
- !val,
- msecs_to_jiffies(mhi_cntrl->timeout_ms));
- if (!ret) {
- ret = -EIO;
+ ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
+ MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
+ interval_us);
+ if (ret) {
dev_info(dev, "Failed to reset MHI due to syserr state\n");
- goto error_async_power_up;
+ goto error_exit;
}
/*
@@ -1119,6 +1107,10 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
}
+ ret = mhi_init_irq_setup(mhi_cntrl);
+ if (ret)
+ goto error_exit;
+
/* Transition to next state */
next_state = MHI_IN_PBL(current_ee) ?
DEV_ST_TRANSITION_PBL : DEV_ST_TRANSITION_READY;
@@ -1131,10 +1123,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
return 0;
-error_async_power_up:
- mhi_deinit_free_irq(mhi_cntrl);
-
-error_setup_irq:
+error_exit:
mhi_cntrl->pm_state = MHI_PM_DISABLE;
mutex_unlock(&mhi_cntrl->pm_mutex);
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 4c577a731709..3a258a677df8 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -403,7 +403,50 @@ static const struct mhi_pci_dev_info mhi_mv31_info = {
.dma_data_width = 32,
};
+static const struct mhi_channel_config mhi_sierra_em919x_channels[] = {
+ MHI_CHANNEL_CONFIG_UL_SBL(2, "SAHARA", 32, 0),
+ MHI_CHANNEL_CONFIG_DL_SBL(3, "SAHARA", 256, 0),
+ MHI_CHANNEL_CONFIG_UL(4, "DIAG", 32, 0),
+ MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 0),
+ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 128, 0),
+ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 128, 0),
+ MHI_CHANNEL_CONFIG_UL(14, "QMI", 32, 0),
+ MHI_CHANNEL_CONFIG_DL(15, "QMI", 32, 0),
+ MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
+ MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
+ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 512, 1),
+ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 512, 2),
+};
+
+static struct mhi_event_config modem_sierra_em919x_mhi_events[] = {
+ /* first ring is control+data and DIAG ring */
+ MHI_EVENT_CONFIG_CTRL(0, 2048),
+ /* Hardware channels request dedicated hardware event rings */
+ MHI_EVENT_CONFIG_HW_DATA(1, 2048, 100),
+ MHI_EVENT_CONFIG_HW_DATA(2, 2048, 101)
+};
+
+static const struct mhi_controller_config modem_sierra_em919x_config = {
+ .max_channels = 128,
+ .timeout_ms = 24000,
+ .num_channels = ARRAY_SIZE(mhi_sierra_em919x_channels),
+ .ch_cfg = mhi_sierra_em919x_channels,
+ .num_events = ARRAY_SIZE(modem_sierra_em919x_mhi_events),
+ .event_cfg = modem_sierra_em919x_mhi_events,
+};
+
+static const struct mhi_pci_dev_info mhi_sierra_em919x_info = {
+ .name = "sierra-em919x",
+ .config = &modem_sierra_em919x_config,
+ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+ .dma_data_width = 32,
+ .sideband_wake = false,
+};
+
static const struct pci_device_id mhi_pci_id_table[] = {
+ /* EM919x (sdx55), use the same vid:pid as qcom-sdx55m */
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x18d7, 0x0200),
+ .driver_data = (kernel_ulong_t) &mhi_sierra_em919x_info },
{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306),
.driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info },
{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304),
@@ -423,6 +466,9 @@ static const struct pci_device_id mhi_pci_id_table[] = {
/* DW5930e (sdx55), Non-eSIM, It's also T99W175 */
{ PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b1),
.driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
+ /* T99W175 (sdx55), Based on Qualcomm new baseline */
+ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0bf),
+ .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
/* MV31-W (Cinterion) */
{ PCI_DEVICE(0x1269, 0x00b3),
.driver_data = (kernel_ulong_t) &mhi_mv31_info },
@@ -529,18 +575,12 @@ static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num];
mhi_cntrl->reg_len = pci_resource_len(pdev, bar_num);
- err = pci_set_dma_mask(pdev, dma_mask);
+ err = dma_set_mask_and_coherent(&pdev->dev, dma_mask);
if (err) {
dev_err(&pdev->dev, "Cannot set proper DMA mask\n");
return err;
}
- err = pci_set_consistent_dma_mask(pdev, dma_mask);
- if (err) {
- dev_err(&pdev->dev, "set consistent dma mask failed\n");
- return err;
- }
-
pci_set_master(pdev);
return 0;
@@ -1018,7 +1058,7 @@ static int __maybe_unused mhi_pci_freeze(struct device *dev)
* context.
*/
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
- mhi_power_down(mhi_cntrl, false);
+ mhi_power_down(mhi_cntrl, true);
mhi_unprepare_after_power_down(mhi_cntrl);
}
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index ea0424922de7..db612045616f 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -914,6 +914,7 @@ int mvebu_mbus_add_window_remap_by_id(unsigned int target,
return mvebu_mbus_alloc_window(s, base, size, remap, target, attribute);
}
+EXPORT_SYMBOL_GPL(mvebu_mbus_add_window_remap_by_id);
int mvebu_mbus_add_window_by_id(unsigned int target, unsigned int attribute,
phys_addr_t base, size_t size)
@@ -921,6 +922,7 @@ int mvebu_mbus_add_window_by_id(unsigned int target, unsigned int attribute,
return mvebu_mbus_add_window_remap_by_id(target, attribute, base,
size, MVEBU_MBUS_NO_REMAP);
}
+EXPORT_SYMBOL_GPL(mvebu_mbus_add_window_by_id);
int mvebu_mbus_del_window(phys_addr_t base, size_t size)
{
@@ -933,6 +935,7 @@ int mvebu_mbus_del_window(phys_addr_t base, size_t size)
mvebu_mbus_disable_window(&mbus_state, win);
return 0;
}
+EXPORT_SYMBOL_GPL(mvebu_mbus_del_window);
void mvebu_mbus_get_pcie_mem_aperture(struct resource *res)
{
@@ -940,6 +943,7 @@ void mvebu_mbus_get_pcie_mem_aperture(struct resource *res)
return;
*res = mbus_state.pcie_mem_aperture;
}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_pcie_mem_aperture);
void mvebu_mbus_get_pcie_io_aperture(struct resource *res)
{
@@ -947,6 +951,7 @@ void mvebu_mbus_get_pcie_io_aperture(struct resource *res)
return;
*res = mbus_state.pcie_io_aperture;
}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_pcie_io_aperture);
int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
{
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 9877e413fce3..1b57d4666e43 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3691,27 +3691,6 @@ static struct ctl_table cdrom_table[] = {
},
{ }
};
-
-static struct ctl_table cdrom_cdrom_table[] = {
- {
- .procname = "cdrom",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_table,
- },
- { }
-};
-
-/* Make sure that /proc/sys/dev is there */
-static struct ctl_table cdrom_root_table[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_cdrom_table,
- },
- { }
-};
static struct ctl_table_header *cdrom_sysctl_header;
static void cdrom_sysctl_register(void)
@@ -3721,7 +3700,7 @@ static void cdrom_sysctl_register(void)
if (!atomic_add_unless(&initialized, 1, 1))
return;
- cdrom_sysctl_header = register_sysctl_table(cdrom_root_table);
+ cdrom_sysctl_header = register_sysctl("dev/cdrom", cdrom_table);
/* set the defaults */
cdrom_sysctl_settings.autoclose = autoclose;
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index b40edae32817..dc78a4fb879e 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -588,20 +588,11 @@ static void agp_amd64_remove(struct pci_dev *pdev)
agp_bridges_found--;
}
-#ifdef CONFIG_PM
+#define agp_amd64_suspend NULL
-static int agp_amd64_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused agp_amd64_resume(struct device *dev)
{
- pci_save_state(pdev);
- pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
- return 0;
-}
-
-static int agp_amd64_resume(struct pci_dev *pdev)
-{
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
+ struct pci_dev *pdev = to_pci_dev(dev);
if (pdev->vendor == PCI_VENDOR_ID_NVIDIA)
nforce3_agp_init(pdev);
@@ -609,8 +600,6 @@ static int agp_amd64_resume(struct pci_dev *pdev)
return amd_8151_configure();
}
-#endif /* CONFIG_PM */
-
static const struct pci_device_id agp_amd64_pci_table[] = {
{
.class = (PCI_CLASS_BRIDGE_HOST << 8),
@@ -738,15 +727,14 @@ static const struct pci_device_id agp_amd64_pci_promisc_table[] = {
{ }
};
+static SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, agp_amd64_suspend, agp_amd64_resume);
+
static struct pci_driver agp_amd64_pci_driver = {
.name = "agpgart-amd64",
.id_table = agp_amd64_pci_table,
.probe = agp_amd64_probe,
.remove = agp_amd64_remove,
-#ifdef CONFIG_PM
- .suspend = agp_amd64_suspend,
- .resume = agp_amd64_resume,
-#endif
+ .driver.pm = &agp_amd64_pm_ops,
};
diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c
index 14909fc5d767..f8a02f4bef1b 100644
--- a/drivers/char/agp/sis-agp.c
+++ b/drivers/char/agp/sis-agp.c
@@ -217,26 +217,14 @@ static void agp_sis_remove(struct pci_dev *pdev)
agp_put_bridge(bridge);
}
-#ifdef CONFIG_PM
+#define agp_sis_suspend NULL
-static int agp_sis_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused agp_sis_resume(
+ __attribute__((unused)) struct device *dev)
{
- pci_save_state(pdev);
- pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
- return 0;
-}
-
-static int agp_sis_resume(struct pci_dev *pdev)
-{
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
-
return sis_driver.configure();
}
-#endif /* CONFIG_PM */
-
static const struct pci_device_id agp_sis_pci_table[] = {
{
.class = (PCI_CLASS_BRIDGE_HOST << 8),
@@ -419,15 +407,14 @@ static const struct pci_device_id agp_sis_pci_table[] = {
MODULE_DEVICE_TABLE(pci, agp_sis_pci_table);
+static SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, agp_sis_suspend, agp_sis_resume);
+
static struct pci_driver agp_sis_pci_driver = {
.name = "agpgart-sis",
.id_table = agp_sis_pci_table,
.probe = agp_sis_probe,
.remove = agp_sis_remove,
-#ifdef CONFIG_PM
- .suspend = agp_sis_suspend,
- .resume = agp_sis_resume,
-#endif
+ .driver.pm = &agp_sis_pm_ops,
};
static int __init agp_sis_init(void)
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 87a92a044570..a460ae352772 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -492,22 +492,11 @@ static void agp_via_remove(struct pci_dev *pdev)
agp_put_bridge(bridge);
}
-#ifdef CONFIG_PM
+#define agp_via_suspend NULL
-static int agp_via_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused agp_via_resume(struct device *dev)
{
- pci_save_state (pdev);
- pci_set_power_state (pdev, PCI_D3hot);
-
- return 0;
-}
-
-static int agp_via_resume(struct pci_dev *pdev)
-{
- struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
-
- pci_set_power_state (pdev, PCI_D0);
- pci_restore_state(pdev);
+ struct agp_bridge_data *bridge = dev_get_drvdata(dev);
if (bridge->driver == &via_agp3_driver)
return via_configure_agp3();
@@ -517,8 +506,6 @@ static int agp_via_resume(struct pci_dev *pdev)
return 0;
}
-#endif /* CONFIG_PM */
-
/* must be the same order as name table above */
static const struct pci_device_id agp_via_pci_table[] = {
#define ID(x) \
@@ -567,16 +554,14 @@ static const struct pci_device_id agp_via_pci_table[] = {
MODULE_DEVICE_TABLE(pci, agp_via_pci_table);
+static SIMPLE_DEV_PM_OPS(agp_via_pm_ops, agp_via_suspend, agp_via_resume);
static struct pci_driver agp_via_pci_driver = {
.name = "agpgart-via",
.id_table = agp_via_pci_table,
.probe = agp_via_probe,
.remove = agp_via_remove,
-#ifdef CONFIG_PM
- .suspend = agp_via_suspend,
- .resume = agp_via_resume,
-#endif
+ .driver.pm = &agp_via_pm_ops,
};
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index deb85a334c93..36203d3fa6ea 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -89,8 +89,8 @@ static struct applicom_board {
spinlock_t mutex;
} apbs[MAX_BOARD];
-static unsigned int irq = 0; /* interrupt number IRQ */
-static unsigned long mem = 0; /* physical segment of board */
+static unsigned int irq; /* interrupt number IRQ */
+static unsigned long mem; /* physical segment of board */
module_param_hw(irq, uint, irq, 0);
MODULE_PARM_DESC(irq, "IRQ of the Applicom board");
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4e5431f01450..563dfae3b8da 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -746,26 +746,6 @@ static struct ctl_table hpet_table[] = {
{}
};
-static struct ctl_table hpet_root[] = {
- {
- .procname = "hpet",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_root,
- },
- {}
-};
-
static struct ctl_table_header *sysctl_header;
/*
@@ -1061,7 +1041,7 @@ static int __init hpet_init(void)
if (result < 0)
return -ENODEV;
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl("dev/hpet", hpet_table);
result = acpi_bus_register_driver(&hpet_acpi_driver);
if (result < 0) {
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 0a7dde135db1..e856df7e285c 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -179,9 +179,9 @@ static void remove_common(struct virtio_device *vdev)
vi->data_avail = 0;
vi->data_idx = 0;
complete(&vi->have_data);
- vdev->config->reset(vdev);
if (vi->hwrng_register_done)
hwrng_unregister(&vi->hwrng);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
ida_simple_remove(&rng_index_ida, vi->index);
kfree(vi);
diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h
index 9ccb6b270b07..95164246afd1 100644
--- a/drivers/char/mwave/3780i.h
+++ b/drivers/char/mwave/3780i.h
@@ -68,7 +68,7 @@ typedef struct {
unsigned char ClockControl:1; /* RW: Clock control: 0=normal, 1=stop 3780i clocks */
unsigned char SoftReset:1; /* RW: Soft reset 0=normal, 1=soft reset active */
unsigned char ConfigMode:1; /* RW: Configuration mode, 0=normal, 1=config mode */
- unsigned char Reserved:5; /* 0: Reserved */
+ unsigned short Reserved:13; /* 0: Reserved */
} DSP_ISA_SLAVE_CONTROL;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 227fb7802738..68613f0b6887 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -101,7 +101,7 @@
* ===============================
*
* There are four exported interfaces; two for use within the kernel,
- * and two or use from userspace.
+ * and two for use from userspace.
*
* Exported interfaces ---- userspace output
* -----------------------------------------
@@ -124,7 +124,7 @@
*
* The primary kernel interface is
*
- * void get_random_bytes(void *buf, int nbytes);
+ * void get_random_bytes(void *buf, int nbytes);
*
* This interface will return the requested number of random bytes,
* and place it in the requested buffer. This is equivalent to a
@@ -132,10 +132,10 @@
*
* For less critical applications, there are the functions:
*
- * u32 get_random_u32()
- * u64 get_random_u64()
- * unsigned int get_random_int()
- * unsigned long get_random_long()
+ * u32 get_random_u32()
+ * u64 get_random_u64()
+ * unsigned int get_random_int()
+ * unsigned long get_random_long()
*
* These are produced by a cryptographic RNG seeded from get_random_bytes,
* and so do not deplete the entropy pool as much. These are recommended
@@ -197,10 +197,10 @@
* from the devices are:
*
* void add_device_randomness(const void *buf, unsigned int size);
- * void add_input_randomness(unsigned int type, unsigned int code,
+ * void add_input_randomness(unsigned int type, unsigned int code,
* unsigned int value);
* void add_interrupt_randomness(int irq);
- * void add_disk_randomness(struct gendisk *disk);
+ * void add_disk_randomness(struct gendisk *disk);
* void add_hwgenerator_randomness(const char *buffer, size_t count,
* size_t entropy);
* void add_bootloader_randomness(const void *buf, unsigned int size);
@@ -296,8 +296,8 @@
* /dev/random and /dev/urandom created already, they can be created
* by using the commands:
*
- * mknod /dev/random c 1 8
- * mknod /dev/urandom c 1 9
+ * mknod /dev/random c 1 8
+ * mknod /dev/urandom c 1 9
*
* Acknowledgements:
* =================
@@ -337,7 +337,6 @@
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/percpu.h>
-#include <linux/fips.h>
#include <linux/ptrace.h>
#include <linux/workqueue.h>
#include <linux/irq.h>
@@ -360,30 +359,11 @@
/* #define ADD_INTERRUPT_BENCH */
/*
- * Configuration information
- */
-#define INPUT_POOL_SHIFT 12
-#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5))
-#define OUTPUT_POOL_SHIFT 10
-#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5))
-#define EXTRACT_SIZE (BLAKE2S_HASH_SIZE / 2)
-
-/*
- * To allow fractional bits to be tracked, the entropy_count field is
- * denominated in units of 1/8th bits.
- *
- * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in
- * credit_entropy_bits() needs to be 64 bits wide.
- */
-#define ENTROPY_SHIFT 3
-#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT)
-
-/*
* If the entropy count falls under this number of bits, then we
* should wake up processes which are selecting or polling on write
* access to /dev/random.
*/
-static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
+static int random_write_wakeup_bits = 28 * (1 << 5);
/*
* Originally, we used a primitive polynomial of degree .poolwords
@@ -430,14 +410,27 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
* polynomial which improves the resulting TGFSR polynomial to be
* irreducible, which we have made here.
*/
-static const struct poolinfo {
- int poolbitshift, poolwords, poolbytes, poolfracbits;
-#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5)
- int tap1, tap2, tap3, tap4, tap5;
-} poolinfo_table[] = {
- /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */
+enum poolinfo {
+ POOL_WORDS = 128,
+ POOL_WORDMASK = POOL_WORDS - 1,
+ POOL_BYTES = POOL_WORDS * sizeof(u32),
+ POOL_BITS = POOL_BYTES * 8,
+ POOL_BITSHIFT = ilog2(POOL_BITS),
+
+ /* To allow fractional bits to be tracked, the entropy_count field is
+ * denominated in units of 1/8th bits. */
+ POOL_ENTROPY_SHIFT = 3,
+#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
+ POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
+
/* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
- { S(128), 104, 76, 51, 25, 1 },
+ POOL_TAP1 = 104,
+ POOL_TAP2 = 76,
+ POOL_TAP3 = 51,
+ POOL_TAP4 = 25,
+ POOL_TAP5 = 1,
+
+ EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2
};
/*
@@ -450,9 +443,9 @@ static DEFINE_SPINLOCK(random_ready_list_lock);
static LIST_HEAD(random_ready_list);
struct crng_state {
- __u32 state[16];
- unsigned long init_time;
- spinlock_t lock;
+ u32 state[16];
+ unsigned long init_time;
+ spinlock_t lock;
};
static struct crng_state primary_crng = {
@@ -476,10 +469,10 @@ static bool crng_need_final_init = false;
#define crng_ready() (likely(crng_init > 1))
static int crng_init_cnt = 0;
static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]);
+#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE)
+static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]);
static void _crng_backtrack_protect(struct crng_state *crng,
- __u8 tmp[CHACHA_BLOCK_SIZE], int used);
+ u8 tmp[CHACHA_BLOCK_SIZE], int used);
static void process_random_ready_list(void);
static void _get_random_bytes(void *buf, int nbytes);
@@ -500,38 +493,23 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
*
**********************************************************************/
-struct entropy_store;
-struct entropy_store {
- /* read-only data: */
- const struct poolinfo *poolinfo;
- __u32 *pool;
- const char *name;
+static u32 input_pool_data[POOL_WORDS] __latent_entropy;
- /* read-write data: */
+static struct {
spinlock_t lock;
- unsigned short add_ptr;
- unsigned short input_rotate;
+ u16 add_ptr;
+ u16 input_rotate;
int entropy_count;
- unsigned int last_data_init:1;
- __u8 last_data[EXTRACT_SIZE];
+} input_pool = {
+ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
};
-static ssize_t extract_entropy(struct entropy_store *r, void *buf,
- size_t nbytes, int min, int rsvd);
-static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
- size_t nbytes, int fips);
+static ssize_t extract_entropy(void *buf, size_t nbytes, int min);
+static ssize_t _extract_entropy(void *buf, size_t nbytes);
-static void crng_reseed(struct crng_state *crng, struct entropy_store *r);
-static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy;
+static void crng_reseed(struct crng_state *crng, bool use_input_pool);
-static struct entropy_store input_pool = {
- .poolinfo = &poolinfo_table[0],
- .name = "input",
- .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
- .pool = input_pool_data
-};
-
-static __u32 const twist_table[8] = {
+static const u32 twist_table[8] = {
0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
@@ -545,39 +523,31 @@ static __u32 const twist_table[8] = {
* it's cheap to do so and helps slightly in the expected case where
* the entropy is concentrated in the low-order bits.
*/
-static void _mix_pool_bytes(struct entropy_store *r, const void *in,
- int nbytes)
+static void _mix_pool_bytes(const void *in, int nbytes)
{
- unsigned long i, tap1, tap2, tap3, tap4, tap5;
+ unsigned long i;
int input_rotate;
- int wordmask = r->poolinfo->poolwords - 1;
- const unsigned char *bytes = in;
- __u32 w;
-
- tap1 = r->poolinfo->tap1;
- tap2 = r->poolinfo->tap2;
- tap3 = r->poolinfo->tap3;
- tap4 = r->poolinfo->tap4;
- tap5 = r->poolinfo->tap5;
+ const u8 *bytes = in;
+ u32 w;
- input_rotate = r->input_rotate;
- i = r->add_ptr;
+ input_rotate = input_pool.input_rotate;
+ i = input_pool.add_ptr;
/* mix one byte at a time to simplify size handling and churn faster */
while (nbytes--) {
w = rol32(*bytes++, input_rotate);
- i = (i - 1) & wordmask;
+ i = (i - 1) & POOL_WORDMASK;
/* XOR in the various taps */
- w ^= r->pool[i];
- w ^= r->pool[(i + tap1) & wordmask];
- w ^= r->pool[(i + tap2) & wordmask];
- w ^= r->pool[(i + tap3) & wordmask];
- w ^= r->pool[(i + tap4) & wordmask];
- w ^= r->pool[(i + tap5) & wordmask];
+ w ^= input_pool_data[i];
+ w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK];
+ w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK];
+ w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK];
+ w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK];
+ w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK];
/* Mix the result back in with a twist */
- r->pool[i] = (w >> 3) ^ twist_table[w & 7];
+ input_pool_data[i] = (w >> 3) ^ twist_table[w & 7];
/*
* Normally, we add 7 bits of rotation to the pool.
@@ -588,33 +558,31 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in,
input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
}
- r->input_rotate = input_rotate;
- r->add_ptr = i;
+ input_pool.input_rotate = input_rotate;
+ input_pool.add_ptr = i;
}
-static void __mix_pool_bytes(struct entropy_store *r, const void *in,
- int nbytes)
+static void __mix_pool_bytes(const void *in, int nbytes)
{
- trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_);
- _mix_pool_bytes(r, in, nbytes);
+ trace_mix_pool_bytes_nolock(nbytes, _RET_IP_);
+ _mix_pool_bytes(in, nbytes);
}
-static void mix_pool_bytes(struct entropy_store *r, const void *in,
- int nbytes)
+static void mix_pool_bytes(const void *in, int nbytes)
{
unsigned long flags;
- trace_mix_pool_bytes(r->name, nbytes, _RET_IP_);
- spin_lock_irqsave(&r->lock, flags);
- _mix_pool_bytes(r, in, nbytes);
- spin_unlock_irqrestore(&r->lock, flags);
+ trace_mix_pool_bytes(nbytes, _RET_IP_);
+ spin_lock_irqsave(&input_pool.lock, flags);
+ _mix_pool_bytes(in, nbytes);
+ spin_unlock_irqrestore(&input_pool.lock, flags);
}
struct fast_pool {
- __u32 pool[4];
- unsigned long last;
- unsigned short reg_idx;
- unsigned char count;
+ u32 pool[4];
+ unsigned long last;
+ u16 reg_idx;
+ u8 count;
};
/*
@@ -624,8 +592,8 @@ struct fast_pool {
*/
static void fast_mix(struct fast_pool *f)
{
- __u32 a = f->pool[0], b = f->pool[1];
- __u32 c = f->pool[2], d = f->pool[3];
+ u32 a = f->pool[0], b = f->pool[1];
+ u32 c = f->pool[2], d = f->pool[3];
a += b; c += d;
b = rol32(b, 6); d = rol32(d, 27);
@@ -669,17 +637,19 @@ static void process_random_ready_list(void)
* Use credit_entropy_bits_safe() if the value comes from userspace
* or otherwise should be checked for extreme values.
*/
-static void credit_entropy_bits(struct entropy_store *r, int nbits)
+static void credit_entropy_bits(int nbits)
{
- int entropy_count, orig;
- const int pool_size = r->poolinfo->poolfracbits;
- int nfrac = nbits << ENTROPY_SHIFT;
+ int entropy_count, entropy_bits, orig;
+ int nfrac = nbits << POOL_ENTROPY_SHIFT;
+
+ /* Ensure that the multiplication can avoid being 64 bits wide. */
+ BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31);
if (!nbits)
return;
retry:
- entropy_count = orig = READ_ONCE(r->entropy_count);
+ entropy_count = orig = READ_ONCE(input_pool.entropy_count);
if (nfrac < 0) {
/* Debit */
entropy_count += nfrac;
@@ -706,50 +676,43 @@ retry:
* turns no matter how large nbits is.
*/
int pnfrac = nfrac;
- const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2;
+ const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2;
/* The +2 corresponds to the /4 in the denominator */
do {
- unsigned int anfrac = min(pnfrac, pool_size/2);
+ unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2);
unsigned int add =
- ((pool_size - entropy_count)*anfrac*3) >> s;
+ ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s;
entropy_count += add;
pnfrac -= anfrac;
- } while (unlikely(entropy_count < pool_size-2 && pnfrac));
+ } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac));
}
if (WARN_ON(entropy_count < 0)) {
- pr_warn("negative entropy/overflow: pool %s count %d\n",
- r->name, entropy_count);
+ pr_warn("negative entropy/overflow: count %d\n", entropy_count);
entropy_count = 0;
- } else if (entropy_count > pool_size)
- entropy_count = pool_size;
- if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ } else if (entropy_count > POOL_FRACBITS)
+ entropy_count = POOL_FRACBITS;
+ if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
goto retry;
- trace_credit_entropy_bits(r->name, nbits,
- entropy_count >> ENTROPY_SHIFT, _RET_IP_);
+ trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_);
- if (r == &input_pool) {
- int entropy_bits = entropy_count >> ENTROPY_SHIFT;
-
- if (crng_init < 2 && entropy_bits >= 128)
- crng_reseed(&primary_crng, r);
- }
+ entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT;
+ if (crng_init < 2 && entropy_bits >= 128)
+ crng_reseed(&primary_crng, true);
}
-static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+static int credit_entropy_bits_safe(int nbits)
{
- const int nbits_max = r->poolinfo->poolwords * 32;
-
if (nbits < 0)
return -EINVAL;
/* Cap the value to avoid overflows */
- nbits = min(nbits, nbits_max);
+ nbits = min(nbits, POOL_BITS);
- credit_entropy_bits(r, nbits);
+ credit_entropy_bits(nbits);
return 0;
}
@@ -759,7 +722,7 @@ static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
*
*********************************************************************/
-#define CRNG_RESEED_INTERVAL (300*HZ)
+#define CRNG_RESEED_INTERVAL (300 * HZ)
static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
@@ -783,9 +746,9 @@ early_param("random.trust_cpu", parse_trust_cpu);
static bool crng_init_try_arch(struct crng_state *crng)
{
- int i;
- bool arch_init = true;
- unsigned long rv;
+ int i;
+ bool arch_init = true;
+ unsigned long rv;
for (i = 4; i < 16; i++) {
if (!arch_get_random_seed_long(&rv) &&
@@ -801,9 +764,9 @@ static bool crng_init_try_arch(struct crng_state *crng)
static bool __init crng_init_try_arch_early(struct crng_state *crng)
{
- int i;
- bool arch_init = true;
- unsigned long rv;
+ int i;
+ bool arch_init = true;
+ unsigned long rv;
for (i = 4; i < 16; i++) {
if (!arch_get_random_seed_long_early(&rv) &&
@@ -820,14 +783,14 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng)
static void crng_initialize_secondary(struct crng_state *crng)
{
chacha_init_consts(crng->state);
- _get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
+ _get_random_bytes(&crng->state[4], sizeof(u32) * 12);
crng_init_try_arch(crng);
crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
}
static void __init crng_initialize_primary(struct crng_state *crng)
{
- _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0);
+ _extract_entropy(&crng->state[4], sizeof(u32) * 12);
if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) {
invalidate_batched_entropy();
numa_crng_init();
@@ -873,7 +836,7 @@ static void do_numa_crng_init(struct work_struct *work)
struct crng_state *crng;
struct crng_state **pool;
- pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL);
+ pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL);
for_each_online_node(i) {
crng = kmalloc_node(sizeof(struct crng_state),
GFP_KERNEL | __GFP_NOFAIL, i);
@@ -917,10 +880,10 @@ static struct crng_state *select_crng(void)
* path. So we can't afford to dilly-dally. Returns the number of
* bytes processed from cp.
*/
-static size_t crng_fast_load(const char *cp, size_t len)
+static size_t crng_fast_load(const u8 *cp, size_t len)
{
unsigned long flags;
- char *p;
+ u8 *p;
size_t ret = 0;
if (!spin_trylock_irqsave(&primary_crng.lock, flags))
@@ -929,7 +892,7 @@ static size_t crng_fast_load(const char *cp, size_t len)
spin_unlock_irqrestore(&primary_crng.lock, flags);
return 0;
}
- p = (unsigned char *) &primary_crng.state[4];
+ p = (u8 *)&primary_crng.state[4];
while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
cp++; crng_init_cnt++; len--; ret++;
@@ -957,14 +920,14 @@ static size_t crng_fast_load(const char *cp, size_t len)
* like a fixed DMI table (for example), which might very well be
* unique to the machine, but is otherwise unvarying.
*/
-static int crng_slow_load(const char *cp, size_t len)
+static int crng_slow_load(const u8 *cp, size_t len)
{
- unsigned long flags;
- static unsigned char lfsr = 1;
- unsigned char tmp;
- unsigned i, max = CHACHA_KEY_SIZE;
- const char * src_buf = cp;
- char * dest_buf = (char *) &primary_crng.state[4];
+ unsigned long flags;
+ static u8 lfsr = 1;
+ u8 tmp;
+ unsigned int i, max = CHACHA_KEY_SIZE;
+ const u8 *src_buf = cp;
+ u8 *dest_buf = (u8 *)&primary_crng.state[4];
if (!spin_trylock_irqsave(&primary_crng.lock, flags))
return 0;
@@ -975,7 +938,7 @@ static int crng_slow_load(const char *cp, size_t len)
if (len > max)
max = len;
- for (i = 0; i < max ; i++) {
+ for (i = 0; i < max; i++) {
tmp = lfsr;
lfsr >>= 1;
if (tmp & 1)
@@ -988,17 +951,17 @@ static int crng_slow_load(const char *cp, size_t len)
return 1;
}
-static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
+static void crng_reseed(struct crng_state *crng, bool use_input_pool)
{
- unsigned long flags;
- int i, num;
+ unsigned long flags;
+ int i, num;
union {
- __u8 block[CHACHA_BLOCK_SIZE];
- __u32 key[8];
+ u8 block[CHACHA_BLOCK_SIZE];
+ u32 key[8];
} buf;
- if (r) {
- num = extract_entropy(r, &buf, 32, 16, 0);
+ if (use_input_pool) {
+ num = extract_entropy(&buf, 32, 16);
if (num == 0)
return;
} else {
@@ -1008,11 +971,11 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
}
spin_lock_irqsave(&crng->lock, flags);
for (i = 0; i < 8; i++) {
- unsigned long rv;
+ unsigned long rv;
if (!arch_get_random_seed_long(&rv) &&
!arch_get_random_long(&rv))
rv = random_get_entropy();
- crng->state[i+4] ^= buf.key[i] ^ rv;
+ crng->state[i + 4] ^= buf.key[i] ^ rv;
}
memzero_explicit(&buf, sizeof(buf));
WRITE_ONCE(crng->init_time, jiffies);
@@ -1020,8 +983,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
crng_finalize_init(crng);
}
-static void _extract_crng(struct crng_state *crng,
- __u8 out[CHACHA_BLOCK_SIZE])
+static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
{
unsigned long flags, init_time;
@@ -1029,8 +991,7 @@ static void _extract_crng(struct crng_state *crng,
init_time = READ_ONCE(crng->init_time);
if (time_after(READ_ONCE(crng_global_init_time), init_time) ||
time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
- crng_reseed(crng, crng == &primary_crng ?
- &input_pool : NULL);
+ crng_reseed(crng, crng == &primary_crng);
}
spin_lock_irqsave(&crng->lock, flags);
chacha20_block(&crng->state[0], out);
@@ -1039,7 +1000,7 @@ static void _extract_crng(struct crng_state *crng,
spin_unlock_irqrestore(&crng->lock, flags);
}
-static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE])
+static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
{
_extract_crng(select_crng(), out);
}
@@ -1049,26 +1010,26 @@ static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE])
* enough) to mutate the CRNG key to provide backtracking protection.
*/
static void _crng_backtrack_protect(struct crng_state *crng,
- __u8 tmp[CHACHA_BLOCK_SIZE], int used)
+ u8 tmp[CHACHA_BLOCK_SIZE], int used)
{
- unsigned long flags;
- __u32 *s, *d;
- int i;
+ unsigned long flags;
+ u32 *s, *d;
+ int i;
- used = round_up(used, sizeof(__u32));
+ used = round_up(used, sizeof(u32));
if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
extract_crng(tmp);
used = 0;
}
spin_lock_irqsave(&crng->lock, flags);
- s = (__u32 *) &tmp[used];
+ s = (u32 *)&tmp[used];
d = &crng->state[4];
- for (i=0; i < 8; i++)
+ for (i = 0; i < 8; i++)
*d++ ^= *s++;
spin_unlock_irqrestore(&crng->lock, flags);
}
-static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
{
_crng_backtrack_protect(select_crng(), tmp, used);
}
@@ -1076,7 +1037,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used)
static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
{
ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
- __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+ u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
int large_request = (nbytes > 256);
while (nbytes) {
@@ -1108,7 +1069,6 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
return ret;
}
-
/*********************************************************************
*
* Entropy input management
@@ -1141,8 +1101,8 @@ void add_device_randomness(const void *buf, unsigned int size)
trace_add_device_randomness(size, _RET_IP_);
spin_lock_irqsave(&input_pool.lock, flags);
- _mix_pool_bytes(&input_pool, buf, size);
- _mix_pool_bytes(&input_pool, &time, sizeof(time));
+ _mix_pool_bytes(buf, size);
+ _mix_pool_bytes(&time, sizeof(time));
spin_unlock_irqrestore(&input_pool.lock, flags);
}
EXPORT_SYMBOL(add_device_randomness);
@@ -1161,19 +1121,17 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
*/
static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
{
- struct entropy_store *r;
struct {
long jiffies;
- unsigned cycles;
- unsigned num;
+ unsigned int cycles;
+ unsigned int num;
} sample;
long delta, delta2, delta3;
sample.jiffies = jiffies;
sample.cycles = random_get_entropy();
sample.num = num;
- r = &input_pool;
- mix_pool_bytes(r, &sample, sizeof(sample));
+ mix_pool_bytes(&sample, sizeof(sample));
/*
* Calculate number of bits of randomness we probably added.
@@ -1205,11 +1163,11 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
* Round down by 1 bit on general principles,
* and limit entropy estimate to 12 bits.
*/
- credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
+ credit_entropy_bits(min_t(int, fls(delta >> 1), 11));
}
void add_input_randomness(unsigned int type, unsigned int code,
- unsigned int value)
+ unsigned int value)
{
static unsigned char last_value;
@@ -1220,7 +1178,7 @@ void add_input_randomness(unsigned int type, unsigned int code,
last_value = value;
add_timer_randomness(&input_timer_state,
(type << 4) ^ code ^ (code >> 4) ^ value);
- trace_add_input_randomness(ENTROPY_BITS(&input_pool));
+ trace_add_input_randomness(POOL_ENTROPY_BITS());
}
EXPORT_SYMBOL_GPL(add_input_randomness);
@@ -1229,33 +1187,33 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
#ifdef ADD_INTERRUPT_BENCH
static unsigned long avg_cycles, avg_deviation;
-#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
-#define FIXED_1_2 (1 << (AVG_SHIFT-1))
+#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
+#define FIXED_1_2 (1 << (AVG_SHIFT - 1))
static void add_interrupt_bench(cycles_t start)
{
- long delta = random_get_entropy() - start;
+ long delta = random_get_entropy() - start;
- /* Use a weighted moving average */
- delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
- avg_cycles += delta;
- /* And average deviation */
- delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
- avg_deviation += delta;
+ /* Use a weighted moving average */
+ delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
+ avg_cycles += delta;
+ /* And average deviation */
+ delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
+ avg_deviation += delta;
}
#else
#define add_interrupt_bench(x)
#endif
-static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
{
- __u32 *ptr = (__u32 *) regs;
+ u32 *ptr = (u32 *)regs;
unsigned int idx;
if (regs == NULL)
return 0;
idx = READ_ONCE(f->reg_idx);
- if (idx >= sizeof(struct pt_regs) / sizeof(__u32))
+ if (idx >= sizeof(struct pt_regs) / sizeof(u32))
idx = 0;
ptr += idx++;
WRITE_ONCE(f->reg_idx, idx);
@@ -1264,13 +1222,12 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
void add_interrupt_randomness(int irq)
{
- struct entropy_store *r;
- struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
- struct pt_regs *regs = get_irq_regs();
- unsigned long now = jiffies;
- cycles_t cycles = random_get_entropy();
- __u32 c_high, j_high;
- __u64 ip;
+ struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
+ struct pt_regs *regs = get_irq_regs();
+ unsigned long now = jiffies;
+ cycles_t cycles = random_get_entropy();
+ u32 c_high, j_high;
+ u64 ip;
if (cycles == 0)
cycles = get_reg(fast_pool, regs);
@@ -1280,38 +1237,35 @@ void add_interrupt_randomness(int irq)
fast_pool->pool[1] ^= now ^ c_high;
ip = regs ? instruction_pointer(regs) : _RET_IP_;
fast_pool->pool[2] ^= ip;
- fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
- get_reg(fast_pool, regs);
+ fast_pool->pool[3] ^=
+ (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs);
fast_mix(fast_pool);
add_interrupt_bench(cycles);
if (unlikely(crng_init == 0)) {
if ((fast_pool->count >= 64) &&
- crng_fast_load((char *) fast_pool->pool,
- sizeof(fast_pool->pool)) > 0) {
+ crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
fast_pool->count = 0;
fast_pool->last = now;
}
return;
}
- if ((fast_pool->count < 64) &&
- !time_after(now, fast_pool->last + HZ))
+ if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ))
return;
- r = &input_pool;
- if (!spin_trylock(&r->lock))
+ if (!spin_trylock(&input_pool.lock))
return;
fast_pool->last = now;
- __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
- spin_unlock(&r->lock);
+ __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
+ spin_unlock(&input_pool.lock);
fast_pool->count = 0;
/* award one bit for the contents of the fast pool */
- credit_entropy_bits(r, 1);
+ credit_entropy_bits(1);
}
EXPORT_SYMBOL_GPL(add_interrupt_randomness);
@@ -1322,7 +1276,7 @@ void add_disk_randomness(struct gendisk *disk)
return;
/* first major is 1, so we get >= 0x200 here */
add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
- trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool));
+ trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS());
}
EXPORT_SYMBOL_GPL(add_disk_randomness);
#endif
@@ -1337,43 +1291,36 @@ EXPORT_SYMBOL_GPL(add_disk_randomness);
* This function decides how many bytes to actually take from the
* given pool, and also debits the entropy count accordingly.
*/
-static size_t account(struct entropy_store *r, size_t nbytes, int min,
- int reserved)
+static size_t account(size_t nbytes, int min)
{
- int entropy_count, orig, have_bytes;
+ int entropy_count, orig;
size_t ibytes, nfrac;
- BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
+ BUG_ON(input_pool.entropy_count > POOL_FRACBITS);
/* Can we pull enough? */
retry:
- entropy_count = orig = READ_ONCE(r->entropy_count);
- ibytes = nbytes;
- /* never pull more than available */
- have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
-
- if ((have_bytes -= reserved) < 0)
- have_bytes = 0;
- ibytes = min_t(size_t, ibytes, have_bytes);
- if (ibytes < min)
- ibytes = 0;
-
+ entropy_count = orig = READ_ONCE(input_pool.entropy_count);
if (WARN_ON(entropy_count < 0)) {
- pr_warn("negative entropy count: pool %s count %d\n",
- r->name, entropy_count);
+ pr_warn("negative entropy count: count %d\n", entropy_count);
entropy_count = 0;
}
- nfrac = ibytes << (ENTROPY_SHIFT + 3);
- if ((size_t) entropy_count > nfrac)
+
+ /* never pull more than available */
+ ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3));
+ if (ibytes < min)
+ ibytes = 0;
+ nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3);
+ if ((size_t)entropy_count > nfrac)
entropy_count -= nfrac;
else
entropy_count = 0;
- if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
goto retry;
- trace_debit_entropy(r->name, 8 * ibytes);
- if (ibytes && ENTROPY_BITS(r) < random_write_wakeup_bits) {
+ trace_debit_entropy(8 * ibytes);
+ if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) {
wake_up_interruptible(&random_write_wait);
kill_fasync(&fasync, SIGIO, POLL_OUT);
}
@@ -1386,7 +1333,7 @@ retry:
*
* Note: we assume that .poolwords is a multiple of 16 words.
*/
-static void extract_buf(struct entropy_store *r, __u8 *out)
+static void extract_buf(u8 *out)
{
struct blake2s_state state __aligned(__alignof__(unsigned long));
u8 hash[BLAKE2S_HASH_SIZE];
@@ -1408,9 +1355,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
}
/* Generate a hash across the pool */
- spin_lock_irqsave(&r->lock, flags);
- blake2s_update(&state, (const u8 *)r->pool,
- r->poolinfo->poolwords * sizeof(*r->pool));
+ spin_lock_irqsave(&input_pool.lock, flags);
+ blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES);
blake2s_final(&state, hash); /* final zeros out state */
/*
@@ -1422,8 +1368,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
* brute-forcing the feedback as hard as brute-forcing the
* hash.
*/
- __mix_pool_bytes(r, hash, sizeof(hash));
- spin_unlock_irqrestore(&r->lock, flags);
+ __mix_pool_bytes(hash, sizeof(hash));
+ spin_unlock_irqrestore(&input_pool.lock, flags);
/* Note that EXTRACT_SIZE is half of hash size here, because above
* we've dumped the full length back into mixer. By reducing the
@@ -1433,23 +1379,13 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
memzero_explicit(hash, sizeof(hash));
}
-static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
- size_t nbytes, int fips)
+static ssize_t _extract_entropy(void *buf, size_t nbytes)
{
ssize_t ret = 0, i;
- __u8 tmp[EXTRACT_SIZE];
- unsigned long flags;
+ u8 tmp[EXTRACT_SIZE];
while (nbytes) {
- extract_buf(r, tmp);
-
- if (fips) {
- spin_lock_irqsave(&r->lock, flags);
- if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
- panic("Hardware RNG duplicated output!\n");
- memcpy(r->last_data, tmp, EXTRACT_SIZE);
- spin_unlock_irqrestore(&r->lock, flags);
- }
+ extract_buf(tmp);
i = min_t(int, nbytes, EXTRACT_SIZE);
memcpy(buf, tmp, i);
nbytes -= i;
@@ -1468,42 +1404,19 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
* returns it in a buffer.
*
* The min parameter specifies the minimum amount we can pull before
- * failing to avoid races that defeat catastrophic reseeding while the
- * reserved parameter indicates how much entropy we must leave in the
- * pool after each pull to avoid starving other readers.
+ * failing to avoid races that defeat catastrophic reseeding.
*/
-static ssize_t extract_entropy(struct entropy_store *r, void *buf,
- size_t nbytes, int min, int reserved)
+static ssize_t extract_entropy(void *buf, size_t nbytes, int min)
{
- __u8 tmp[EXTRACT_SIZE];
- unsigned long flags;
-
- /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
- if (fips_enabled) {
- spin_lock_irqsave(&r->lock, flags);
- if (!r->last_data_init) {
- r->last_data_init = 1;
- spin_unlock_irqrestore(&r->lock, flags);
- trace_extract_entropy(r->name, EXTRACT_SIZE,
- ENTROPY_BITS(r), _RET_IP_);
- extract_buf(r, tmp);
- spin_lock_irqsave(&r->lock, flags);
- memcpy(r->last_data, tmp, EXTRACT_SIZE);
- }
- spin_unlock_irqrestore(&r->lock, flags);
- }
-
- trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
- nbytes = account(r, nbytes, min, reserved);
-
- return _extract_entropy(r, buf, nbytes, fips_enabled);
+ trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_);
+ nbytes = account(nbytes, min);
+ return _extract_entropy(buf, nbytes);
}
#define warn_unseeded_randomness(previous) \
- _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous))
+ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-static void _warn_unseeded_randomness(const char *func_name, void *caller,
- void **previous)
+static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
{
#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
const bool print_once = false;
@@ -1511,8 +1424,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
static bool print_once __read_mostly;
#endif
- if (print_once ||
- crng_ready() ||
+ if (print_once || crng_ready() ||
(previous && (caller == READ_ONCE(*previous))))
return;
WRITE_ONCE(*previous, caller);
@@ -1520,9 +1432,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
print_once = true;
#endif
if (__ratelimit(&unseeded_warning))
- printk_deferred(KERN_NOTICE "random: %s called from %pS "
- "with crng_init=%d\n", func_name, caller,
- crng_init);
+ printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
+ func_name, caller, crng_init);
}
/*
@@ -1537,7 +1448,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
*/
static void _get_random_bytes(void *buf, int nbytes)
{
- __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+ u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
trace_get_random_bytes(nbytes, _RET_IP_);
@@ -1565,7 +1476,6 @@ void get_random_bytes(void *buf, int nbytes)
}
EXPORT_SYMBOL(get_random_bytes);
-
/*
* Each time the timer fires, we expect that we got an unpredictable
* jump in the cycle counter. Even if the timer is running on another
@@ -1581,7 +1491,7 @@ EXPORT_SYMBOL(get_random_bytes);
*/
static void entropy_timer(struct timer_list *t)
{
- credit_entropy_bits(&input_pool, 1);
+ credit_entropy_bits(1);
}
/*
@@ -1604,15 +1514,15 @@ static void try_to_generate_entropy(void)
timer_setup_on_stack(&stack.timer, entropy_timer, 0);
while (!crng_ready()) {
if (!timer_pending(&stack.timer))
- mod_timer(&stack.timer, jiffies+1);
- mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+ mod_timer(&stack.timer, jiffies + 1);
+ mix_pool_bytes(&stack.now, sizeof(stack.now));
schedule();
stack.now = random_get_entropy();
}
del_timer_sync(&stack.timer);
destroy_timer_on_stack(&stack.timer);
- mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+ mix_pool_bytes(&stack.now, sizeof(stack.now));
}
/*
@@ -1731,7 +1641,7 @@ EXPORT_SYMBOL(del_random_ready_callback);
int __must_check get_random_bytes_arch(void *buf, int nbytes)
{
int left = nbytes;
- char *p = buf;
+ u8 *p = buf;
trace_get_random_bytes_arch(left, _RET_IP_);
while (left) {
@@ -1753,26 +1663,24 @@ EXPORT_SYMBOL(get_random_bytes_arch);
/*
* init_std_data - initialize pool with system data
*
- * @r: pool to initialize
- *
* This function clears the pool's entropy count and mixes some system
* data into the pool to prepare it for use. The pool is not cleared
* as that can only decrease the entropy in the pool.
*/
-static void __init init_std_data(struct entropy_store *r)
+static void __init init_std_data(void)
{
int i;
ktime_t now = ktime_get_real();
unsigned long rv;
- mix_pool_bytes(r, &now, sizeof(now));
- for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) {
+ mix_pool_bytes(&now, sizeof(now));
+ for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) {
if (!arch_get_random_seed_long(&rv) &&
!arch_get_random_long(&rv))
rv = random_get_entropy();
- mix_pool_bytes(r, &rv, sizeof(rv));
+ mix_pool_bytes(&rv, sizeof(rv));
}
- mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
+ mix_pool_bytes(utsname(), sizeof(*(utsname())));
}
/*
@@ -1787,7 +1695,7 @@ static void __init init_std_data(struct entropy_store *r)
*/
int __init rand_initialize(void)
{
- init_std_data(&input_pool);
+ init_std_data();
if (crng_need_final_init)
crng_finalize_init(&primary_crng);
crng_initialize_primary(&primary_crng);
@@ -1816,20 +1724,19 @@ void rand_initialize_disk(struct gendisk *disk)
}
#endif
-static ssize_t
-urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes,
- loff_t *ppos)
+static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
{
int ret;
- nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
+ nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3));
ret = extract_crng_user(buf, nbytes);
- trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool));
+ trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS());
return ret;
}
-static ssize_t
-urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
{
static int maxwarn = 10;
@@ -1843,8 +1750,8 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return urandom_read_nowarn(file, buf, nbytes, ppos);
}
-static ssize_t
-random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
{
int ret;
@@ -1854,8 +1761,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return urandom_read_nowarn(file, buf, nbytes, ppos);
}
-static __poll_t
-random_poll(struct file *file, poll_table * wait)
+static __poll_t random_poll(struct file *file, poll_table *wait)
{
__poll_t mask;
@@ -1864,16 +1770,15 @@ random_poll(struct file *file, poll_table * wait)
mask = 0;
if (crng_ready())
mask |= EPOLLIN | EPOLLRDNORM;
- if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
+ if (POOL_ENTROPY_BITS() < random_write_wakeup_bits)
mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
-static int
-write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
+static int write_pool(const char __user *buffer, size_t count)
{
size_t bytes;
- __u32 t, buf[16];
+ u32 t, buf[16];
const char __user *p = buffer;
while (count > 0) {
@@ -1883,7 +1788,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
if (copy_from_user(&buf, p, bytes))
return -EFAULT;
- for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) {
+ for (b = bytes; b > 0; b -= sizeof(u32), i++) {
if (!arch_get_random_int(&t))
break;
buf[i] ^= t;
@@ -1892,7 +1797,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
count -= bytes;
p += bytes;
- mix_pool_bytes(r, buf, bytes);
+ mix_pool_bytes(buf, bytes);
cond_resched();
}
@@ -1904,7 +1809,7 @@ static ssize_t random_write(struct file *file, const char __user *buffer,
{
size_t ret;
- ret = write_pool(&input_pool, buffer, count);
+ ret = write_pool(buffer, count);
if (ret)
return ret;
@@ -1920,7 +1825,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
switch (cmd) {
case RNDGETENTCNT:
/* inherently racy, no point locking */
- ent_count = ENTROPY_BITS(&input_pool);
+ ent_count = POOL_ENTROPY_BITS();
if (put_user(ent_count, p))
return -EFAULT;
return 0;
@@ -1929,7 +1834,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (get_user(ent_count, p))
return -EFAULT;
- return credit_entropy_bits_safe(&input_pool, ent_count);
+ return credit_entropy_bits_safe(ent_count);
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1939,11 +1844,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EINVAL;
if (get_user(size, p++))
return -EFAULT;
- retval = write_pool(&input_pool, (const char __user *)p,
- size);
+ retval = write_pool((const char __user *)p, size);
if (retval < 0)
return retval;
- return credit_entropy_bits_safe(&input_pool, ent_count);
+ return credit_entropy_bits_safe(ent_count);
case RNDZAPENTCNT:
case RNDCLEARPOOL:
/*
@@ -1959,7 +1863,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (crng_init < 2)
return -ENODATA;
- crng_reseed(&primary_crng, &input_pool);
+ crng_reseed(&primary_crng, true);
WRITE_ONCE(crng_global_init_time, jiffies - 1);
return 0;
default:
@@ -1973,9 +1877,9 @@ static int random_fasync(int fd, struct file *filp, int on)
}
const struct file_operations random_fops = {
- .read = random_read,
+ .read = random_read,
.write = random_write,
- .poll = random_poll,
+ .poll = random_poll,
.unlocked_ioctl = random_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
@@ -1983,7 +1887,7 @@ const struct file_operations random_fops = {
};
const struct file_operations urandom_fops = {
- .read = urandom_read,
+ .read = urandom_read,
.write = random_write,
.unlocked_ioctl = random_ioctl,
.compat_ioctl = compat_ptr_ioctl,
@@ -1991,19 +1895,19 @@ const struct file_operations urandom_fops = {
.llseek = noop_llseek,
};
-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
- unsigned int, flags)
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
+ flags)
{
int ret;
- if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE))
+ if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
return -EINVAL;
/*
* Requesting insecure and blocking randomness at the same time makes
* no sense.
*/
- if ((flags & (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM))
+ if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
return -EINVAL;
if (count > INT_MAX)
@@ -2030,7 +1934,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
#include <linux/sysctl.h>
static int min_write_thresh;
-static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int max_write_thresh = POOL_BITS;
static int random_min_urandom_seed = 60;
static char sysctl_bootid[16];
@@ -2043,8 +1947,8 @@ static char sysctl_bootid[16];
* returned as an ASCII string in the standard UUID format; if via the
* sysctl system call, as 16 bytes of binary data.
*/
-static int proc_do_uuid(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
struct ctl_table fake_table;
unsigned char buf[64], tmp_uuid[16], *uuid;
@@ -2073,13 +1977,13 @@ static int proc_do_uuid(struct ctl_table *table, int write,
/*
* Return entropy available scaled to integral bits
*/
-static int proc_do_entropy(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_entropy(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
struct ctl_table fake_table;
int entropy_count;
- entropy_count = *(int *)table->data >> ENTROPY_SHIFT;
+ entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT;
fake_table.data = &entropy_count;
fake_table.maxlen = sizeof(entropy_count);
@@ -2087,9 +1991,8 @@ static int proc_do_entropy(struct ctl_table *table, int write,
return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
}
-static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
-extern struct ctl_table random_table[];
-struct ctl_table random_table[] = {
+static int sysctl_poolsize = POOL_BITS;
+static struct ctl_table random_table[] = {
{
.procname = "poolsize",
.data = &sysctl_poolsize,
@@ -2151,7 +2054,18 @@ struct ctl_table random_table[] = {
#endif
{ }
};
-#endif /* CONFIG_SYSCTL */
+
+/*
+ * rand_initialize() is called before sysctl_init(),
+ * so we cannot call register_sysctl_init() in rand_initialize()
+ */
+static int __init random_sysctls_init(void)
+{
+ register_sysctl_init("kernel/random", random_table);
+ return 0;
+}
+device_initcall(random_sysctls_init);
+#endif /* CONFIG_SYSCTL */
struct batched_entropy {
union {
@@ -2171,7 +2085,7 @@ struct batched_entropy {
* point prior.
*/
static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
+ .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
};
u64 get_random_u64(void)
@@ -2196,7 +2110,7 @@ u64 get_random_u64(void)
EXPORT_SYMBOL(get_random_u64);
static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
+ .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
};
u32 get_random_u32(void)
{
@@ -2228,7 +2142,7 @@ static void invalidate_batched_entropy(void)
int cpu;
unsigned long flags;
- for_each_possible_cpu (cpu) {
+ for_each_possible_cpu(cpu) {
struct batched_entropy *batched_entropy;
batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
@@ -2257,8 +2171,7 @@ static void invalidate_batched_entropy(void)
* Return: A page aligned address within [start, start + range). On error,
* @start is returned.
*/
-unsigned long
-randomize_page(unsigned long start, unsigned long range)
+unsigned long randomize_page(unsigned long start, unsigned long range)
{
if (!PAGE_ALIGNED(start)) {
range -= PAGE_ALIGN(start) - start;
@@ -2283,11 +2196,9 @@ randomize_page(unsigned long start, unsigned long range)
void add_hwgenerator_randomness(const char *buffer, size_t count,
size_t entropy)
{
- struct entropy_store *poolp = &input_pool;
-
if (unlikely(crng_init == 0)) {
size_t ret = crng_fast_load(buffer, count);
- mix_pool_bytes(poolp, buffer, ret);
+ mix_pool_bytes(buffer, ret);
count -= ret;
buffer += ret;
if (!count || crng_init == 0)
@@ -2300,9 +2211,9 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
*/
wait_event_interruptible(random_write_wait,
!system_wq || kthread_should_stop() ||
- ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
- mix_pool_bytes(poolp, buffer, count);
- credit_entropy_bits(poolp, entropy);
+ POOL_ENTROPY_BITS() <= random_write_wakeup_bits);
+ mix_pool_bytes(buffer, count);
+ credit_entropy_bits(entropy);
}
EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 660c5c388c29..2359889a35a0 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1958,7 +1958,7 @@ static void virtcons_remove(struct virtio_device *vdev)
spin_unlock_irq(&pdrvdata_lock);
/* Disable interrupts for vqs */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
/* Finish up work that's lined up */
if (use_multiport(portdev))
cancel_work_sync(&portdev->control_work);
@@ -2148,7 +2148,7 @@ static int virtcons_freeze(struct virtio_device *vdev)
portdev = vdev->priv;
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
if (use_multiport(portdev))
virtqueue_disable_cb(portdev->c_ivq);
diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c
index 57ae183982d8..f7b41366666e 100644
--- a/drivers/clk/clk-si5341.c
+++ b/drivers/clk/clk-si5341.c
@@ -1740,7 +1740,7 @@ static int si5341_probe(struct i2c_client *client,
clk_prepare(data->clk[i].hw.clk);
}
- err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get,
+ err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get,
data);
if (err) {
dev_err(&client->dev, "unable to add clk provider\n");
diff --git a/drivers/clk/mediatek/clk-mt7986-apmixed.c b/drivers/clk/mediatek/clk-mt7986-apmixed.c
index 76c8ebdeae96..98ec3887585f 100644
--- a/drivers/clk/mediatek/clk-mt7986-apmixed.c
+++ b/drivers/clk/mediatek/clk-mt7986-apmixed.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c
index 3be168c34fc0..f209c559fbc3 100644
--- a/drivers/clk/mediatek/clk-mt7986-infracfg.c
+++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/mediatek/clk-mt7986-topckgen.c b/drivers/clk/mediatek/clk-mt7986-topckgen.c
index 8550e2be7773..8f6f79b6e31e 100644
--- a/drivers/clk/mediatek/clk-mt7986-topckgen.c
+++ b/drivers/clk/mediatek/clk-mt7986-topckgen.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021 MediaTek Inc.
* Author: Sam Shih <sam.shih@mediatek.com>
diff --git a/drivers/clk/visconti/pll.c b/drivers/clk/visconti/pll.c
index a2398bc6c6e4..a484cb945d67 100644
--- a/drivers/clk/visconti/pll.c
+++ b/drivers/clk/visconti/pll.c
@@ -246,7 +246,6 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
{
struct clk_init_data init;
struct visconti_pll *pll;
- struct clk *pll_clk;
struct clk_hw *pll_hw_clk;
size_t len;
int ret;
@@ -277,7 +276,7 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
pll_hw_clk = &pll->hw;
ret = clk_hw_register(NULL, &pll->hw);
if (ret) {
- pr_err("failed to register pll clock %s : %ld\n", name, PTR_ERR(pll_clk));
+ pr_err("failed to register pll clock %s : %d\n", name, ret);
kfree(pll);
pll_hw_clk = ERR_PTR(ret);
}
diff --git a/drivers/comedi/comedi_buf.c b/drivers/comedi/comedi_buf.c
index 06bfc859ab31..393966c09740 100644
--- a/drivers/comedi/comedi_buf.c
+++ b/drivers/comedi/comedi_buf.c
@@ -9,8 +9,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
-
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "comedi_internal.h"
#ifdef PAGE_KERNEL_NOCACHE
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index 763cea8418f8..55a0cae04b8d 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -23,7 +23,7 @@
#include <linux/poll.h>
#include <linux/device.h>
#include <linux/fs.h>
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
#include <linux/cdev.h>
#include <linux/io.h>
diff --git a/drivers/comedi/comedi_pci.c b/drivers/comedi/comedi_pci.c
index 54739af7eb71..cc2581902195 100644
--- a/drivers/comedi/comedi_pci.c
+++ b/drivers/comedi/comedi_pci.c
@@ -9,8 +9,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/**
* comedi_to_pci_dev() - Return PCI device attached to COMEDI device
diff --git a/drivers/comedi/comedi_pcmcia.c b/drivers/comedi/comedi_pcmcia.c
index bb273bb202e6..c53aad0fc2ce 100644
--- a/drivers/comedi/comedi_pcmcia.c
+++ b/drivers/comedi/comedi_pcmcia.c
@@ -9,8 +9,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
-
-#include "comedi_pcmcia.h"
+#include <linux/comedi/comedi_pcmcia.h>
/**
* comedi_to_pcmcia_dev() - Return PCMCIA device attached to COMEDI device
diff --git a/drivers/comedi/comedi_usb.c b/drivers/comedi/comedi_usb.c
index eea8ebf32ed0..d11ea148ebf8 100644
--- a/drivers/comedi/comedi_usb.c
+++ b/drivers/comedi/comedi_usb.c
@@ -8,8 +8,7 @@
*/
#include <linux/module.h>
-
-#include "comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
/**
* comedi_to_usb_interface() - Return USB interface attached to COMEDI device
diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c
index 750a6ff3c03c..8eb1f699a857 100644
--- a/drivers/comedi/drivers.c
+++ b/drivers/comedi/drivers.c
@@ -17,8 +17,7 @@
#include <linux/dma-direction.h>
#include <linux/interrupt.h>
#include <linux/firmware.h>
-
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "comedi_internal.h"
struct comedi_driver *comedi_drivers;
diff --git a/drivers/comedi/drivers/8255.c b/drivers/comedi/drivers/8255.c
index e23335c75867..ced8ea09d4fa 100644
--- a/drivers/comedi/drivers/8255.c
+++ b/drivers/comedi/drivers/8255.c
@@ -40,9 +40,8 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
static int dev_8255_attach(struct comedi_device *dev,
struct comedi_devconfig *it)
diff --git a/drivers/comedi/drivers/8255_pci.c b/drivers/comedi/drivers/8255_pci.c
index 5a810f0e532a..0fec048e3a53 100644
--- a/drivers/comedi/drivers/8255_pci.c
+++ b/drivers/comedi/drivers/8255_pci.c
@@ -53,10 +53,8 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
-
-#include "8255.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
enum pci_8255_boardid {
BOARD_ADLINK_PCI7224,
diff --git a/drivers/comedi/drivers/addi_apci_1032.c b/drivers/comedi/drivers/addi_apci_1032.c
index 81a246fbcc01..8eec6d9402de 100644
--- a/drivers/comedi/drivers/addi_apci_1032.c
+++ b/drivers/comedi/drivers/addi_apci_1032.c
@@ -63,8 +63,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "amcc_s5933.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_1500.c b/drivers/comedi/drivers/addi_apci_1500.c
index b04c15dcfb57..c94c78588889 100644
--- a/drivers/comedi/drivers/addi_apci_1500.c
+++ b/drivers/comedi/drivers/addi_apci_1500.c
@@ -14,8 +14,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "amcc_s5933.h"
#include "z8536.h"
diff --git a/drivers/comedi/drivers/addi_apci_1516.c b/drivers/comedi/drivers/addi_apci_1516.c
index 274ec9fb030c..3c48b72dad9d 100644
--- a/drivers/comedi/drivers/addi_apci_1516.c
+++ b/drivers/comedi/drivers/addi_apci_1516.c
@@ -14,8 +14,8 @@
*/
#include <linux/module.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "addi_watchdog.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_1564.c b/drivers/comedi/drivers/addi_apci_1564.c
index 06fc7ed96200..0cd40948bee7 100644
--- a/drivers/comedi/drivers/addi_apci_1564.c
+++ b/drivers/comedi/drivers/addi_apci_1564.c
@@ -68,8 +68,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "addi_tcw.h"
#include "addi_watchdog.h"
diff --git a/drivers/comedi/drivers/addi_apci_16xx.c b/drivers/comedi/drivers/addi_apci_16xx.c
index c306aa41df97..ec2c321d2431 100644
--- a/drivers/comedi/drivers/addi_apci_16xx.c
+++ b/drivers/comedi/drivers/addi_apci_16xx.c
@@ -14,8 +14,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/addi_apci_2032.c b/drivers/comedi/drivers/addi_apci_2032.c
index e9a2b37a4ae0..e048dfc3ec77 100644
--- a/drivers/comedi/drivers/addi_apci_2032.c
+++ b/drivers/comedi/drivers/addi_apci_2032.c
@@ -16,8 +16,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "addi_watchdog.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_2200.c b/drivers/comedi/drivers/addi_apci_2200.c
index 4c5aee784bd9..00378c9dddc8 100644
--- a/drivers/comedi/drivers/addi_apci_2200.c
+++ b/drivers/comedi/drivers/addi_apci_2200.c
@@ -14,8 +14,8 @@
*/
#include <linux/module.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "addi_watchdog.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_3120.c b/drivers/comedi/drivers/addi_apci_3120.c
index 1ed3b33d1a30..28a242e69721 100644
--- a/drivers/comedi/drivers/addi_apci_3120.c
+++ b/drivers/comedi/drivers/addi_apci_3120.c
@@ -14,8 +14,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "amcc_s5933.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_3501.c b/drivers/comedi/drivers/addi_apci_3501.c
index f0c9642f3f1a..ecb5552f1785 100644
--- a/drivers/comedi/drivers/addi_apci_3501.c
+++ b/drivers/comedi/drivers/addi_apci_3501.c
@@ -41,8 +41,8 @@
*/
#include <linux/module.h>
+#include <linux/comedi/comedi_pci.h>
-#include "../comedi_pci.h"
#include "amcc_s5933.h"
/*
diff --git a/drivers/comedi/drivers/addi_apci_3xxx.c b/drivers/comedi/drivers/addi_apci_3xxx.c
index a90d59377e18..bc72273e6a29 100644
--- a/drivers/comedi/drivers/addi_apci_3xxx.c
+++ b/drivers/comedi/drivers/addi_apci_3xxx.c
@@ -15,8 +15,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#define CONV_UNIT_NS BIT(0)
#define CONV_UNIT_US BIT(1)
diff --git a/drivers/comedi/drivers/addi_watchdog.c b/drivers/comedi/drivers/addi_watchdog.c
index 69b323fb869f..ed87ab432020 100644
--- a/drivers/comedi/drivers/addi_watchdog.c
+++ b/drivers/comedi/drivers/addi_watchdog.c
@@ -10,7 +10,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "addi_tcw.h"
#include "addi_watchdog.h"
diff --git a/drivers/comedi/drivers/adl_pci6208.c b/drivers/comedi/drivers/adl_pci6208.c
index 9ae4cc523dd4..b27354a51f5c 100644
--- a/drivers/comedi/drivers/adl_pci6208.c
+++ b/drivers/comedi/drivers/adl_pci6208.c
@@ -24,8 +24,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI-6208/6216-GL register map
diff --git a/drivers/comedi/drivers/adl_pci7x3x.c b/drivers/comedi/drivers/adl_pci7x3x.c
index 8fc45638ff59..e9f22de9b6f1 100644
--- a/drivers/comedi/drivers/adl_pci7x3x.c
+++ b/drivers/comedi/drivers/adl_pci7x3x.c
@@ -46,8 +46,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "plx9052.h"
diff --git a/drivers/comedi/drivers/adl_pci8164.c b/drivers/comedi/drivers/adl_pci8164.c
index d5e1bda81557..0c513a67a264 100644
--- a/drivers/comedi/drivers/adl_pci8164.c
+++ b/drivers/comedi/drivers/adl_pci8164.c
@@ -19,8 +19,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#define PCI8164_AXIS(x) ((x) * 0x08)
#define PCI8164_CMD_MSTS_REG 0x00
diff --git a/drivers/comedi/drivers/adl_pci9111.c b/drivers/comedi/drivers/adl_pci9111.c
index a062c5ab20e9..c50f94272a74 100644
--- a/drivers/comedi/drivers/adl_pci9111.c
+++ b/drivers/comedi/drivers/adl_pci9111.c
@@ -42,11 +42,10 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
#include "plx9052.h"
-#include "comedi_8254.h"
#define PCI9111_FIFO_HALF_SIZE 512
diff --git a/drivers/comedi/drivers/adl_pci9118.c b/drivers/comedi/drivers/adl_pci9118.c
index cda3a4267dca..9a816c718303 100644
--- a/drivers/comedi/drivers/adl_pci9118.c
+++ b/drivers/comedi/drivers/adl_pci9118.c
@@ -78,11 +78,10 @@
#include <linux/gfp.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
#include "amcc_s5933.h"
-#include "comedi_8254.h"
/*
* PCI BAR2 Register map (dev->iobase)
diff --git a/drivers/comedi/drivers/adq12b.c b/drivers/comedi/drivers/adq12b.c
index d719f76709ef..19d765182006 100644
--- a/drivers/comedi/drivers/adq12b.c
+++ b/drivers/comedi/drivers/adq12b.c
@@ -48,8 +48,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/* address scheme (page 2.17 of the manual) */
#define ADQ12B_CTREG 0x00
diff --git a/drivers/comedi/drivers/adv_pci1710.c b/drivers/comedi/drivers/adv_pci1710.c
index 090607760be6..4f2639968260 100644
--- a/drivers/comedi/drivers/adv_pci1710.c
+++ b/drivers/comedi/drivers/adv_pci1710.c
@@ -30,10 +30,9 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
#include "amcc_s5933.h"
/*
diff --git a/drivers/comedi/drivers/adv_pci1720.c b/drivers/comedi/drivers/adv_pci1720.c
index 2fcd7e8e7d85..2619591ba301 100644
--- a/drivers/comedi/drivers/adv_pci1720.c
+++ b/drivers/comedi/drivers/adv_pci1720.c
@@ -42,8 +42,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI BAR2 Register map (dev->iobase)
diff --git a/drivers/comedi/drivers/adv_pci1723.c b/drivers/comedi/drivers/adv_pci1723.c
index 23660a9fdb9c..e2aedb152068 100644
--- a/drivers/comedi/drivers/adv_pci1723.c
+++ b/drivers/comedi/drivers/adv_pci1723.c
@@ -32,8 +32,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI Bar 2 I/O Register map (dev->iobase)
diff --git a/drivers/comedi/drivers/adv_pci1724.c b/drivers/comedi/drivers/adv_pci1724.c
index e8ab573c839f..bb43b7deeb56 100644
--- a/drivers/comedi/drivers/adv_pci1724.c
+++ b/drivers/comedi/drivers/adv_pci1724.c
@@ -38,8 +38,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI bar 2 Register I/O map (dev->iobase)
diff --git a/drivers/comedi/drivers/adv_pci1760.c b/drivers/comedi/drivers/adv_pci1760.c
index 6de8ab97d346..fcfc2e299110 100644
--- a/drivers/comedi/drivers/adv_pci1760.c
+++ b/drivers/comedi/drivers/adv_pci1760.c
@@ -22,8 +22,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI-1760 Register Map
diff --git a/drivers/comedi/drivers/adv_pci_dio.c b/drivers/comedi/drivers/adv_pci_dio.c
index 54c7419c8ca6..efa3e46b554b 100644
--- a/drivers/comedi/drivers/adv_pci_dio.c
+++ b/drivers/comedi/drivers/adv_pci_dio.c
@@ -23,11 +23,9 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
-
-#include "8255.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register offset definitions
diff --git a/drivers/comedi/drivers/aio_aio12_8.c b/drivers/comedi/drivers/aio_aio12_8.c
index 4829115921a3..30b8a32204d8 100644
--- a/drivers/comedi/drivers/aio_aio12_8.c
+++ b/drivers/comedi/drivers/aio_aio12_8.c
@@ -22,10 +22,9 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/aio_iiro_16.c b/drivers/comedi/drivers/aio_iiro_16.c
index fe3876235075..b00fab0b89d4 100644
--- a/drivers/comedi/drivers/aio_iiro_16.c
+++ b/drivers/comedi/drivers/aio_iiro_16.c
@@ -30,8 +30,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#define AIO_IIRO_16_RELAY_0_7 0x00
#define AIO_IIRO_16_INPUT_0_7 0x01
diff --git a/drivers/comedi/drivers/amplc_dio200.c b/drivers/comedi/drivers/amplc_dio200.c
index fa19c9e7c56b..4544bcdd8a70 100644
--- a/drivers/comedi/drivers/amplc_dio200.c
+++ b/drivers/comedi/drivers/amplc_dio200.c
@@ -185,7 +185,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "amplc_dio200.h"
diff --git a/drivers/comedi/drivers/amplc_dio200_common.c b/drivers/comedi/drivers/amplc_dio200_common.c
index a3454130d5f8..ff651f2eb86c 100644
--- a/drivers/comedi/drivers/amplc_dio200_common.c
+++ b/drivers/comedi/drivers/amplc_dio200_common.c
@@ -12,12 +12,11 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h> /* only for register defines */
+#include <linux/comedi/comedi_8254.h>
#include "amplc_dio200.h"
-#include "comedi_8254.h"
-#include "8255.h" /* only for register defines */
/* 200 series registers */
#define DIO200_IO_SIZE 0x20
diff --git a/drivers/comedi/drivers/amplc_dio200_pci.c b/drivers/comedi/drivers/amplc_dio200_pci.c
index 1bd7a42c8464..527994d82a1f 100644
--- a/drivers/comedi/drivers/amplc_dio200_pci.c
+++ b/drivers/comedi/drivers/amplc_dio200_pci.c
@@ -214,8 +214,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "amplc_dio200.h"
diff --git a/drivers/comedi/drivers/amplc_pc236.c b/drivers/comedi/drivers/amplc_pc236.c
index c377af1d5246..b21e0c906aab 100644
--- a/drivers/comedi/drivers/amplc_pc236.c
+++ b/drivers/comedi/drivers/amplc_pc236.c
@@ -32,8 +32,7 @@
*/
#include <linux/module.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "amplc_pc236.h"
diff --git a/drivers/comedi/drivers/amplc_pc236_common.c b/drivers/comedi/drivers/amplc_pc236_common.c
index 981d281e87a1..9f4f89b1ef23 100644
--- a/drivers/comedi/drivers/amplc_pc236_common.c
+++ b/drivers/comedi/drivers/amplc_pc236_common.c
@@ -11,11 +11,10 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
#include "amplc_pc236.h"
-#include "8255.h"
static void pc236_intr_update(struct comedi_device *dev, bool enable)
{
diff --git a/drivers/comedi/drivers/amplc_pc263.c b/drivers/comedi/drivers/amplc_pc263.c
index 68da6098ee84..d7f088a8a5e3 100644
--- a/drivers/comedi/drivers/amplc_pc263.c
+++ b/drivers/comedi/drivers/amplc_pc263.c
@@ -25,7 +25,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/* PC263 registers */
#define PC263_DO_0_7_REG 0x00
diff --git a/drivers/comedi/drivers/amplc_pci224.c b/drivers/comedi/drivers/amplc_pci224.c
index bcf6d61af863..5a04e55daeea 100644
--- a/drivers/comedi/drivers/amplc_pci224.c
+++ b/drivers/comedi/drivers/amplc_pci224.c
@@ -96,10 +96,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
-
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
/*
* PCI224/234 i/o space 1 (PCIBAR2) registers.
diff --git a/drivers/comedi/drivers/amplc_pci230.c b/drivers/comedi/drivers/amplc_pci230.c
index 8911dc2bd2c6..92ba8b8c0172 100644
--- a/drivers/comedi/drivers/amplc_pci230.c
+++ b/drivers/comedi/drivers/amplc_pci230.c
@@ -174,11 +174,9 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
-#include "8255.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
/*
* PCI230 PCI configuration register information
diff --git a/drivers/comedi/drivers/amplc_pci236.c b/drivers/comedi/drivers/amplc_pci236.c
index e7f6fa4d101a..482eb261c333 100644
--- a/drivers/comedi/drivers/amplc_pci236.c
+++ b/drivers/comedi/drivers/amplc_pci236.c
@@ -34,8 +34,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "amplc_pc236.h"
#include "plx9052.h"
diff --git a/drivers/comedi/drivers/amplc_pci263.c b/drivers/comedi/drivers/amplc_pci263.c
index 9217973f1141..1609665c4b18 100644
--- a/drivers/comedi/drivers/amplc_pci263.c
+++ b/drivers/comedi/drivers/amplc_pci263.c
@@ -24,8 +24,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/* PCI263 registers */
#define PCI263_DO_0_7_REG 0x00
diff --git a/drivers/comedi/drivers/c6xdigio.c b/drivers/comedi/drivers/c6xdigio.c
index 786fd15698df..14b90d1c64dc 100644
--- a/drivers/comedi/drivers/c6xdigio.c
+++ b/drivers/comedi/drivers/c6xdigio.c
@@ -30,8 +30,7 @@
#include <linux/timer.h>
#include <linux/io.h>
#include <linux/pnp.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/cb_das16_cs.c b/drivers/comedi/drivers/cb_das16_cs.c
index a5d171e71c33..8e0d2fa5f95d 100644
--- a/drivers/comedi/drivers/cb_das16_cs.c
+++ b/drivers/comedi/drivers/cb_das16_cs.c
@@ -27,10 +27,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-
-#include "../comedi_pcmcia.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedi_pcmcia.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/cb_pcidas.c b/drivers/comedi/drivers/cb_pcidas.c
index 2f20bd56ec6c..0c7576b967fc 100644
--- a/drivers/comedi/drivers/cb_pcidas.c
+++ b/drivers/comedi/drivers/cb_pcidas.c
@@ -54,11 +54,10 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
-#include "8255.h"
#include "amcc_s5933.h"
#define AI_BUFFER_SIZE 1024 /* max ai fifo size */
diff --git a/drivers/comedi/drivers/cb_pcidas64.c b/drivers/comedi/drivers/cb_pcidas64.c
index 41a8fea7f48a..ca6038a25f26 100644
--- a/drivers/comedi/drivers/cb_pcidas64.c
+++ b/drivers/comedi/drivers/cb_pcidas64.c
@@ -73,10 +73,9 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
-#include "../comedi_pci.h"
-
-#include "8255.h"
#include "plx9080.h"
#define TIMER_BASE 25 /* 40MHz master clock */
diff --git a/drivers/comedi/drivers/cb_pcidda.c b/drivers/comedi/drivers/cb_pcidda.c
index 78cf1603638c..c52204a6bda4 100644
--- a/drivers/comedi/drivers/cb_pcidda.c
+++ b/drivers/comedi/drivers/cb_pcidda.c
@@ -27,10 +27,8 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
-
-#include "8255.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
#define EEPROM_SIZE 128 /* number of entries in eeprom */
/* maximum number of ao channels for supported boards */
diff --git a/drivers/comedi/drivers/cb_pcimdas.c b/drivers/comedi/drivers/cb_pcimdas.c
index 2292f69da4f4..8bdb00774f11 100644
--- a/drivers/comedi/drivers/cb_pcimdas.c
+++ b/drivers/comedi/drivers/cb_pcimdas.c
@@ -34,12 +34,11 @@
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
#include "plx9052.h"
-#include "8255.h"
/*
* PCI Bar 1 Register map
diff --git a/drivers/comedi/drivers/cb_pcimdda.c b/drivers/comedi/drivers/cb_pcimdda.c
index 21fc7b3c5f60..bf8093a10315 100644
--- a/drivers/comedi/drivers/cb_pcimdda.c
+++ b/drivers/comedi/drivers/cb_pcimdda.c
@@ -67,10 +67,8 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
-
-#include "8255.h"
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
/* device ids of the cards we support -- currently only 1 card supported */
#define PCI_ID_PCIM_DDA06_16 0x0053
diff --git a/drivers/comedi/drivers/comedi_8254.c b/drivers/comedi/drivers/comedi_8254.c
index 4bf5daa9e885..b4185c1b2695 100644
--- a/drivers/comedi/drivers/comedi_8254.c
+++ b/drivers/comedi/drivers/comedi_8254.c
@@ -116,10 +116,8 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/io.h>
-
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
static unsigned int __i8254_read(struct comedi_8254 *i8254, unsigned int reg)
{
diff --git a/drivers/comedi/drivers/comedi_8255.c b/drivers/comedi/drivers/comedi_8255.c
index b7ca465933ee..5562b9cd0a17 100644
--- a/drivers/comedi/drivers/comedi_8255.c
+++ b/drivers/comedi/drivers/comedi_8255.c
@@ -29,9 +29,8 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
struct subdev_8255_private {
unsigned long regbase;
diff --git a/drivers/comedi/drivers/comedi_bond.c b/drivers/comedi/drivers/comedi_bond.c
index 4392b5927a99..78c39fa84177 100644
--- a/drivers/comedi/drivers/comedi_bond.c
+++ b/drivers/comedi/drivers/comedi_bond.c
@@ -40,9 +40,9 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include "../comedi.h"
-#include "../comedilib.h"
-#include "../comedidev.h"
+#include <linux/comedi.h>
+#include <linux/comedi/comedilib.h>
+#include <linux/comedi/comedidev.h>
struct bonded_device {
struct comedi_device *dev;
diff --git a/drivers/comedi/drivers/comedi_isadma.c b/drivers/comedi/drivers/comedi_isadma.c
index 479b58e209ba..700982464c53 100644
--- a/drivers/comedi/drivers/comedi_isadma.c
+++ b/drivers/comedi/drivers/comedi_isadma.c
@@ -9,10 +9,8 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <asm/dma.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_isadma.h>
/**
* comedi_isadma_program - program and enable an ISA DMA transfer
diff --git a/drivers/comedi/drivers/comedi_parport.c b/drivers/comedi/drivers/comedi_parport.c
index 5338b5eea440..098738a688fe 100644
--- a/drivers/comedi/drivers/comedi_parport.c
+++ b/drivers/comedi/drivers/comedi_parport.c
@@ -57,8 +57,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c
index cbc225eb1991..0b5c0af1cebf 100644
--- a/drivers/comedi/drivers/comedi_test.c
+++ b/drivers/comedi/drivers/comedi_test.c
@@ -45,10 +45,8 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <asm/div64.h>
-
#include <linux/timer.h>
#include <linux/ktime.h>
#include <linux/jiffies.h>
diff --git a/drivers/comedi/drivers/contec_pci_dio.c b/drivers/comedi/drivers/contec_pci_dio.c
index b8fdd9c1f166..41d42ff14144 100644
--- a/drivers/comedi/drivers/contec_pci_dio.c
+++ b/drivers/comedi/drivers/contec_pci_dio.c
@@ -18,8 +18,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/dac02.c b/drivers/comedi/drivers/dac02.c
index 5ef8114c2c85..4b011d66d7b0 100644
--- a/drivers/comedi/drivers/dac02.c
+++ b/drivers/comedi/drivers/dac02.c
@@ -25,8 +25,7 @@
*/
#include <linux/module.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* The output range is selected by jumpering pins on the I/O connector.
diff --git a/drivers/comedi/drivers/daqboard2000.c b/drivers/comedi/drivers/daqboard2000.c
index f64e747078bd..c0a4e1b06fb3 100644
--- a/drivers/comedi/drivers/daqboard2000.c
+++ b/drivers/comedi/drivers/daqboard2000.c
@@ -96,10 +96,9 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8255.h>
-#include "../comedi_pci.h"
-
-#include "8255.h"
#include "plx9080.h"
#define DB2K_FIRMWARE "daqboard2000_firmware.bin"
diff --git a/drivers/comedi/drivers/das08.c b/drivers/comedi/drivers/das08.c
index b50743c5b822..f8ab3af2e391 100644
--- a/drivers/comedi/drivers/das08.c
+++ b/drivers/comedi/drivers/das08.c
@@ -10,11 +10,10 @@
*/
#include <linux/module.h>
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedidev.h"
-
-#include "8255.h"
-#include "comedi_8254.h"
#include "das08.h"
/*
diff --git a/drivers/comedi/drivers/das08_cs.c b/drivers/comedi/drivers/das08_cs.c
index 223479f9ea3c..6075efcf10d6 100644
--- a/drivers/comedi/drivers/das08_cs.c
+++ b/drivers/comedi/drivers/das08_cs.c
@@ -30,8 +30,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pcmcia.h"
+#include <linux/comedi/comedi_pcmcia.h>
#include "das08.h"
diff --git a/drivers/comedi/drivers/das08_isa.c b/drivers/comedi/drivers/das08_isa.c
index 8c4cfa821423..3d43b77cc9f4 100644
--- a/drivers/comedi/drivers/das08_isa.c
+++ b/drivers/comedi/drivers/das08_isa.c
@@ -29,7 +29,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "das08.h"
diff --git a/drivers/comedi/drivers/das08_pci.c b/drivers/comedi/drivers/das08_pci.c
index 1cd903336a4c..982f3ab0ccbd 100644
--- a/drivers/comedi/drivers/das08_pci.c
+++ b/drivers/comedi/drivers/das08_pci.c
@@ -23,8 +23,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "das08.h"
diff --git a/drivers/comedi/drivers/das16.c b/drivers/comedi/drivers/das16.c
index 4ac2622b0fac..937a69ce0977 100644
--- a/drivers/comedi/drivers/das16.c
+++ b/drivers/comedi/drivers/das16.c
@@ -63,12 +63,10 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
#define DAS16_DMA_SIZE 0xff00 /* size in bytes of allocated dma buffer */
diff --git a/drivers/comedi/drivers/das16m1.c b/drivers/comedi/drivers/das16m1.c
index 75f3dbbe97ac..275effb77746 100644
--- a/drivers/comedi/drivers/das16m1.c
+++ b/drivers/comedi/drivers/das16m1.c
@@ -42,10 +42,9 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
-
-#include "8255.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register map (dev->iobase)
diff --git a/drivers/comedi/drivers/das1800.c b/drivers/comedi/drivers/das1800.c
index f50891a6ee7d..f09608c0f4ff 100644
--- a/drivers/comedi/drivers/das1800.c
+++ b/drivers/comedi/drivers/das1800.c
@@ -73,11 +73,9 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/io.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
/* misc. defines */
#define DAS1800_SIZE 16 /* uses 16 io addresses */
diff --git a/drivers/comedi/drivers/das6402.c b/drivers/comedi/drivers/das6402.c
index 96f4107b8054..1af394591e74 100644
--- a/drivers/comedi/drivers/das6402.c
+++ b/drivers/comedi/drivers/das6402.c
@@ -24,10 +24,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/das800.c b/drivers/comedi/drivers/das800.c
index bc08324f422f..4ca33f46eaa7 100644
--- a/drivers/comedi/drivers/das800.c
+++ b/drivers/comedi/drivers/das800.c
@@ -46,10 +46,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
#define N_CHAN_AI 8 /* number of analog input channels */
diff --git a/drivers/comedi/drivers/dmm32at.c b/drivers/comedi/drivers/dmm32at.c
index 56682f01242f..fe023c722aa3 100644
--- a/drivers/comedi/drivers/dmm32at.c
+++ b/drivers/comedi/drivers/dmm32at.c
@@ -29,9 +29,8 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
/* Board register addresses */
#define DMM32AT_AI_START_CONV_REG 0x00
diff --git a/drivers/comedi/drivers/dt2801.c b/drivers/comedi/drivers/dt2801.c
index 0d571d817b4e..230d25010f58 100644
--- a/drivers/comedi/drivers/dt2801.c
+++ b/drivers/comedi/drivers/dt2801.c
@@ -31,7 +31,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include <linux/delay.h>
#define DT2801_TIMEOUT 1000
diff --git a/drivers/comedi/drivers/dt2811.c b/drivers/comedi/drivers/dt2811.c
index 0eb5e6ba6916..dbb9f38da289 100644
--- a/drivers/comedi/drivers/dt2811.c
+++ b/drivers/comedi/drivers/dt2811.c
@@ -40,8 +40,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/dt2814.c b/drivers/comedi/drivers/dt2814.c
index ed44ce0d151b..c98a5a4a7aec 100644
--- a/drivers/comedi/drivers/dt2814.c
+++ b/drivers/comedi/drivers/dt2814.c
@@ -27,8 +27,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <linux/delay.h>
#define DT2814_CSR 0
diff --git a/drivers/comedi/drivers/dt2815.c b/drivers/comedi/drivers/dt2815.c
index 5906f32aa01f..03ba2fd18a21 100644
--- a/drivers/comedi/drivers/dt2815.c
+++ b/drivers/comedi/drivers/dt2815.c
@@ -43,8 +43,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <linux/delay.h>
#define DT2815_DATA 0
diff --git a/drivers/comedi/drivers/dt2817.c b/drivers/comedi/drivers/dt2817.c
index 7c1463e835d3..6738045c7531 100644
--- a/drivers/comedi/drivers/dt2817.c
+++ b/drivers/comedi/drivers/dt2817.c
@@ -25,7 +25,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#define DT2817_CR 0
#define DT2817_DATA 1
diff --git a/drivers/comedi/drivers/dt282x.c b/drivers/comedi/drivers/dt282x.c
index 2656b4b0e3d0..4ae80e6c7266 100644
--- a/drivers/comedi/drivers/dt282x.c
+++ b/drivers/comedi/drivers/dt282x.c
@@ -51,10 +51,8 @@
#include <linux/gfp.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_isadma.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/dt3000.c b/drivers/comedi/drivers/dt3000.c
index ec27aa4730d4..fc6e9c30e522 100644
--- a/drivers/comedi/drivers/dt3000.c
+++ b/drivers/comedi/drivers/dt3000.c
@@ -43,8 +43,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI BAR0 - dual-ported RAM location definitions (dev->mmio)
diff --git a/drivers/comedi/drivers/dt9812.c b/drivers/comedi/drivers/dt9812.c
index 704b04d2980d..b37b9d8eca0d 100644
--- a/drivers/comedi/drivers/dt9812.c
+++ b/drivers/comedi/drivers/dt9812.c
@@ -34,8 +34,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
#define DT9812_DIAGS_BOARD_INFO_ADDR 0xFBFF
#define DT9812_MAX_WRITE_CMD_PIPE_SIZE 32
diff --git a/drivers/comedi/drivers/dyna_pci10xx.c b/drivers/comedi/drivers/dyna_pci10xx.c
index c224422bb126..407a038fb3e0 100644
--- a/drivers/comedi/drivers/dyna_pci10xx.c
+++ b/drivers/comedi/drivers/dyna_pci10xx.c
@@ -26,8 +26,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/mutex.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#define READ_TIMEOUT 50
diff --git a/drivers/comedi/drivers/fl512.c b/drivers/comedi/drivers/fl512.c
index b715f30659fa..139e801fc358 100644
--- a/drivers/comedi/drivers/fl512.c
+++ b/drivers/comedi/drivers/fl512.c
@@ -21,8 +21,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <linux/delay.h>
/*
diff --git a/drivers/comedi/drivers/gsc_hpdi.c b/drivers/comedi/drivers/gsc_hpdi.c
index e35e4a743714..c09d135df38d 100644
--- a/drivers/comedi/drivers/gsc_hpdi.c
+++ b/drivers/comedi/drivers/gsc_hpdi.c
@@ -34,8 +34,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "plx9080.h"
diff --git a/drivers/comedi/drivers/icp_multi.c b/drivers/comedi/drivers/icp_multi.c
index 16d2b78de83c..ac4b11dbd741 100644
--- a/drivers/comedi/drivers/icp_multi.c
+++ b/drivers/comedi/drivers/icp_multi.c
@@ -36,8 +36,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#define ICP_MULTI_ADC_CSR 0x00 /* R/W: ADC command/status register */
#define ICP_MULTI_ADC_CSR_ST BIT(0) /* Start ADC */
diff --git a/drivers/comedi/drivers/ii_pci20kc.c b/drivers/comedi/drivers/ii_pci20kc.c
index 399255dbe388..4a19bf8462be 100644
--- a/drivers/comedi/drivers/ii_pci20kc.c
+++ b/drivers/comedi/drivers/ii_pci20kc.c
@@ -30,7 +30,7 @@
#include <linux/module.h>
#include <linux/io.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c
index f963080dd61f..951c23fa0369 100644
--- a/drivers/comedi/drivers/jr3_pci.c
+++ b/drivers/comedi/drivers/jr3_pci.c
@@ -35,8 +35,7 @@
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/timer.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "jr3_pci.h"
diff --git a/drivers/comedi/drivers/ke_counter.c b/drivers/comedi/drivers/ke_counter.c
index bef1b20c1c8d..b825cf60e1e0 100644
--- a/drivers/comedi/drivers/ke_counter.c
+++ b/drivers/comedi/drivers/ke_counter.c
@@ -19,8 +19,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI BAR 0 Register I/O map
diff --git a/drivers/comedi/drivers/me4000.c b/drivers/comedi/drivers/me4000.c
index 0d3d4cafce2e..9aea02b86ed9 100644
--- a/drivers/comedi/drivers/me4000.c
+++ b/drivers/comedi/drivers/me4000.c
@@ -32,10 +32,9 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
#include "plx9052.h"
#define ME4000_FIRMWARE "me4000_firmware.bin"
diff --git a/drivers/comedi/drivers/me_daq.c b/drivers/comedi/drivers/me_daq.c
index ef18e387471b..076b15097afd 100644
--- a/drivers/comedi/drivers/me_daq.c
+++ b/drivers/comedi/drivers/me_daq.c
@@ -23,8 +23,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "plx9052.h"
diff --git a/drivers/comedi/drivers/mf6x4.c b/drivers/comedi/drivers/mf6x4.c
index 9da8dd748078..14f1d5e9cd59 100644
--- a/drivers/comedi/drivers/mf6x4.c
+++ b/drivers/comedi/drivers/mf6x4.c
@@ -18,8 +18,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/* Registers present in BAR0 memory region */
#define MF624_GPIOC_REG 0x54
diff --git a/drivers/comedi/drivers/mite.c b/drivers/comedi/drivers/mite.c
index 70960e3ba878..88f3cd6f54f1 100644
--- a/drivers/comedi/drivers/mite.c
+++ b/drivers/comedi/drivers/mite.c
@@ -38,8 +38,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/log2.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "mite.h"
diff --git a/drivers/comedi/drivers/mpc624.c b/drivers/comedi/drivers/mpc624.c
index 646f4c086204..9e51ff528ed1 100644
--- a/drivers/comedi/drivers/mpc624.c
+++ b/drivers/comedi/drivers/mpc624.c
@@ -44,8 +44,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <linux/delay.h>
/* Offsets of different ports */
diff --git a/drivers/comedi/drivers/multiq3.c b/drivers/comedi/drivers/multiq3.c
index c1897aee9a9a..07ff5383da99 100644
--- a/drivers/comedi/drivers/multiq3.c
+++ b/drivers/comedi/drivers/multiq3.c
@@ -26,8 +26,7 @@
*/
#include <linux/module.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/ni_6527.c b/drivers/comedi/drivers/ni_6527.c
index f1a45cf7342a..ac5820085231 100644
--- a/drivers/comedi/drivers/ni_6527.c
+++ b/drivers/comedi/drivers/ni_6527.c
@@ -20,8 +20,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI BAR1 - Register memory map
diff --git a/drivers/comedi/drivers/ni_65xx.c b/drivers/comedi/drivers/ni_65xx.c
index 7cd8497420f2..58334de3b253 100644
--- a/drivers/comedi/drivers/ni_65xx.c
+++ b/drivers/comedi/drivers/ni_65xx.c
@@ -49,8 +49,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
/*
* PCI BAR1 Register Map
diff --git a/drivers/comedi/drivers/ni_660x.c b/drivers/comedi/drivers/ni_660x.c
index e60d0125bcb2..0679bc39e0bc 100644
--- a/drivers/comedi/drivers/ni_660x.c
+++ b/drivers/comedi/drivers/ni_660x.c
@@ -26,8 +26,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "mite.h"
#include "ni_tio.h"
diff --git a/drivers/comedi/drivers/ni_670x.c b/drivers/comedi/drivers/ni_670x.c
index c197e47486be..c875d251c230 100644
--- a/drivers/comedi/drivers/ni_670x.c
+++ b/drivers/comedi/drivers/ni_670x.c
@@ -24,8 +24,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#define AO_VALUE_OFFSET 0x00
#define AO_CHAN_OFFSET 0x0c
diff --git a/drivers/comedi/drivers/ni_at_a2150.c b/drivers/comedi/drivers/ni_at_a2150.c
index 10ad7b88713e..df8d219e6723 100644
--- a/drivers/comedi/drivers/ni_at_a2150.c
+++ b/drivers/comedi/drivers/ni_at_a2150.c
@@ -39,11 +39,9 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/io.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
#define A2150_DMA_BUFFER_SIZE 0xff00 /* size in bytes of dma buffer */
diff --git a/drivers/comedi/drivers/ni_at_ao.c b/drivers/comedi/drivers/ni_at_ao.c
index 2a0fb4d460db..9f3147b72aa8 100644
--- a/drivers/comedi/drivers/ni_at_ao.c
+++ b/drivers/comedi/drivers/ni_at_ao.c
@@ -25,10 +25,8 @@
*/
#include <linux/module.h>
-
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/ni_atmio.c b/drivers/comedi/drivers/ni_atmio.c
index 56c78da475e7..8876a1d24c56 100644
--- a/drivers/comedi/drivers/ni_atmio.c
+++ b/drivers/comedi/drivers/ni_atmio.c
@@ -73,12 +73,11 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
-
+#include <linux/comedi/comedidev.h>
#include <linux/isapnp.h>
+#include <linux/comedi/comedi_8255.h>
#include "ni_stc.h"
-#include "8255.h"
/* AT specific setup */
static const struct ni_board_struct ni_boards[] = {
diff --git a/drivers/comedi/drivers/ni_atmio16d.c b/drivers/comedi/drivers/ni_atmio16d.c
index dffce1aa3e69..9fa902529a8e 100644
--- a/drivers/comedi/drivers/ni_atmio16d.c
+++ b/drivers/comedi/drivers/ni_atmio16d.c
@@ -39,9 +39,8 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
/* Configuration and Status Registers */
#define COM_REG_1 0x00 /* wo 16 */
diff --git a/drivers/comedi/drivers/ni_daq_700.c b/drivers/comedi/drivers/ni_daq_700.c
index d40fc89f9cef..0ef20e9a8bc4 100644
--- a/drivers/comedi/drivers/ni_daq_700.c
+++ b/drivers/comedi/drivers/ni_daq_700.c
@@ -41,8 +41,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pcmcia.h"
+#include <linux/comedi/comedi_pcmcia.h>
/* daqcard700 registers */
#define DIO_W 0x04 /* WO 8bit */
diff --git a/drivers/comedi/drivers/ni_daq_dio24.c b/drivers/comedi/drivers/ni_daq_dio24.c
index 44fb65afc218..487733111023 100644
--- a/drivers/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/comedi/drivers/ni_daq_dio24.c
@@ -23,9 +23,8 @@
*/
#include <linux/module.h>
-#include "../comedi_pcmcia.h"
-
-#include "8255.h"
+#include <linux/comedi/comedi_pcmcia.h>
+#include <linux/comedi/comedi_8255.h>
static int dio24_auto_attach(struct comedi_device *dev,
unsigned long context)
diff --git a/drivers/comedi/drivers/ni_labpc.c b/drivers/comedi/drivers/ni_labpc.c
index 1f4a07bd1d26..b25a8e117072 100644
--- a/drivers/comedi/drivers/ni_labpc.c
+++ b/drivers/comedi/drivers/ni_labpc.c
@@ -48,8 +48,7 @@
*/
#include <linux/module.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "ni_labpc.h"
#include "ni_labpc_isadma.h"
diff --git a/drivers/comedi/drivers/ni_labpc_common.c b/drivers/comedi/drivers/ni_labpc_common.c
index dd97946eacaf..763249653228 100644
--- a/drivers/comedi/drivers/ni_labpc_common.c
+++ b/drivers/comedi/drivers/ni_labpc_common.c
@@ -12,11 +12,10 @@
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
-#include "8255.h"
#include "ni_labpc.h"
#include "ni_labpc_regs.h"
#include "ni_labpc_isadma.h"
diff --git a/drivers/comedi/drivers/ni_labpc_cs.c b/drivers/comedi/drivers/ni_labpc_cs.c
index 4f7e2fe21254..62fecb50ec6e 100644
--- a/drivers/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/comedi/drivers/ni_labpc_cs.c
@@ -38,8 +38,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pcmcia.h"
+#include <linux/comedi/comedi_pcmcia.h>
#include "ni_labpc.h"
diff --git a/drivers/comedi/drivers/ni_labpc_isadma.c b/drivers/comedi/drivers/ni_labpc_isadma.c
index a551aca6e615..0652ca8345b6 100644
--- a/drivers/comedi/drivers/ni_labpc_isadma.c
+++ b/drivers/comedi/drivers/ni_labpc_isadma.c
@@ -10,10 +10,9 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_isadma.h>
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
#include "ni_labpc.h"
#include "ni_labpc_regs.h"
#include "ni_labpc_isadma.h"
diff --git a/drivers/comedi/drivers/ni_labpc_pci.c b/drivers/comedi/drivers/ni_labpc_pci.c
index ec180b0fedf7..e2a44bbd9fa6 100644
--- a/drivers/comedi/drivers/ni_labpc_pci.c
+++ b/drivers/comedi/drivers/ni_labpc_pci.c
@@ -22,8 +22,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "ni_labpc.h"
diff --git a/drivers/comedi/drivers/ni_mio_common.c b/drivers/comedi/drivers/ni_mio_common.c
index 4f80a4991f95..d39998565808 100644
--- a/drivers/comedi/drivers/ni_mio_common.c
+++ b/drivers/comedi/drivers/ni_mio_common.c
@@ -43,7 +43,7 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/delay.h>
-#include "8255.h"
+#include <linux/comedi/comedi_8255.h>
#include "mite.h"
/* A timeout count */
diff --git a/drivers/comedi/drivers/ni_mio_cs.c b/drivers/comedi/drivers/ni_mio_cs.c
index 4f37b4e58f09..796f0b743772 100644
--- a/drivers/comedi/drivers/ni_mio_cs.c
+++ b/drivers/comedi/drivers/ni_mio_cs.c
@@ -28,10 +28,10 @@
#include <linux/module.h>
#include <linux/delay.h>
+#include <linux/comedi/comedi_pcmcia.h>
+#include <linux/comedi/comedi_8255.h>
-#include "../comedi_pcmcia.h"
#include "ni_stc.h"
-#include "8255.h"
/*
* AT specific setup
diff --git a/drivers/comedi/drivers/ni_pcidio.c b/drivers/comedi/drivers/ni_pcidio.c
index 623f8d08d13a..2d58e83420e8 100644
--- a/drivers/comedi/drivers/ni_pcidio.c
+++ b/drivers/comedi/drivers/ni_pcidio.c
@@ -42,8 +42,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "mite.h"
diff --git a/drivers/comedi/drivers/ni_pcimio.c b/drivers/comedi/drivers/ni_pcimio.c
index 6c813a490ba5..0b055321023d 100644
--- a/drivers/comedi/drivers/ni_pcimio.c
+++ b/drivers/comedi/drivers/ni_pcimio.c
@@ -94,9 +94,7 @@
#include <linux/module.h>
#include <linux/delay.h>
-
-#include "../comedi_pci.h"
-
+#include <linux/comedi/comedi_pci.h>
#include <asm/byteorder.h>
#include "ni_stc.h"
diff --git a/drivers/comedi/drivers/ni_routes.c b/drivers/comedi/drivers/ni_routes.c
index f0f8cd424b30..f24eeb464eba 100644
--- a/drivers/comedi/drivers/ni_routes.c
+++ b/drivers/comedi/drivers/ni_routes.c
@@ -21,8 +21,7 @@
#include <linux/slab.h>
#include <linux/bsearch.h>
#include <linux/sort.h>
-
-#include "../comedi.h"
+#include <linux/comedi.h>
#include "ni_routes.h"
#include "ni_routing/ni_route_values.h"
diff --git a/drivers/comedi/drivers/ni_routes.h b/drivers/comedi/drivers/ni_routes.h
index 036982315584..cff8a463a03f 100644
--- a/drivers/comedi/drivers/ni_routes.h
+++ b/drivers/comedi/drivers/ni_routes.h
@@ -27,7 +27,7 @@
#include <linux/bitops.h>
#endif
-#include "../comedi.h"
+#include <linux/comedi.h>
/**
* struct ni_route_set - Set of destinations with a common source.
diff --git a/drivers/comedi/drivers/ni_routing/ni_route_values.h b/drivers/comedi/drivers/ni_routing/ni_route_values.h
index 6e358efa6f7f..80880083ea41 100644
--- a/drivers/comedi/drivers/ni_routing/ni_route_values.h
+++ b/drivers/comedi/drivers/ni_routing/ni_route_values.h
@@ -20,7 +20,7 @@
#ifndef _COMEDI_DRIVERS_NI_ROUTINT_NI_ROUTE_VALUES_H
#define _COMEDI_DRIVERS_NI_ROUTINT_NI_ROUTE_VALUES_H
-#include "../../comedi.h"
+#include <linux/comedi.h>
#include <linux/types.h>
/*
diff --git a/drivers/comedi/drivers/ni_routing/tools/.gitignore b/drivers/comedi/drivers/ni_routing/tools/.gitignore
index e3ebffcd900e..c12f825db266 100644
--- a/drivers/comedi/drivers/ni_routing/tools/.gitignore
+++ b/drivers/comedi/drivers/ni_routing/tools/.gitignore
@@ -5,4 +5,5 @@ ni_values.py
convert_c_to_py
c/
csv/
+linux/
all_cfiles.c
diff --git a/drivers/comedi/drivers/ni_routing/tools/Makefile b/drivers/comedi/drivers/ni_routing/tools/Makefile
index 6e92a06a44cb..31212101b3bc 100644
--- a/drivers/comedi/drivers/ni_routing/tools/Makefile
+++ b/drivers/comedi/drivers/ni_routing/tools/Makefile
@@ -3,7 +3,7 @@
# ni_route_values.h
# ni_device_routes.h
# in order to do this, we are also generating a python representation (using
-# ctypesgen) of ../../comedi.h.
+# ctypesgen) of ../../../../../include/uapi/linux/comedi.h.
# This allows us to sort NI signal/terminal names numerically to use a binary
# search through the device_routes tables to find valid routes.
@@ -30,13 +30,21 @@ ALL:
everything : csv-files c-files csv-blank
-CPPFLAGS=-D"BIT(x)=(1UL<<(x))" -D__user=
+CPPFLAGS = -D__user=
+INC_UAPI = ../../../../../include/uapi
-comedi_h.py : ../../../comedi.h
+comedi_h.py: $(INC_UAPI)/linux/comedi.h
ctypesgen $< --include "sys/ioctl.h" --cpp 'gcc -E $(CPPFLAGS)' -o $@
-convert_c_to_py: all_cfiles.c
- gcc -g convert_c_to_py.c -o convert_c_to_py -std=c99
+convert_c_to_py: all_cfiles.c linux/comedi.h
+ gcc -g -I. convert_c_to_py.c -o convert_c_to_py -std=c99
+
+# Create a local 'linux/comedi.h' for use when compiling 'convert_c_to_py.c'
+# with the '-I.' option. (Cannot specify '-I../../../../../include/uapi'
+# because that interferes with inclusion of other system headers.)
+linux/comedi.h: $(INC_UAPI)/linux/comedi.h
+ mkdir -p linux
+ ln -snf ../$< $@
ni_values.py: convert_c_to_py
./convert_c_to_py
@@ -44,7 +52,7 @@ ni_values.py: convert_c_to_py
csv-files : ni_values.py comedi_h.py
./convert_py_to_csv.py
-csv-blank :
+csv-blank : comedi_h.py
./make_blank_csv.py
@echo New blank csv signal table in csv/blank_route_table.csv
@@ -62,17 +70,16 @@ clean-partial :
$(RM) -rf comedi_h.py ni_values.py convert_c_to_py all_cfiles.c *.pyc \
__pycache__/
-clean : partial_clean
- $(RM) -rf c/ csv/
+clean : clean-partial
+ $(RM) -rf c/ csv/ linux/
# Note: One could also use ctypeslib in order to generate these files. The
# caveat is that ctypeslib does not do a great job at handling macro functions.
# The make rules are as follows:
-# comedi.h.xml : ../../comedi.h
+# comedi.h.xml : $(INC_UAPI)/linux/comedi.h
# # note that we have to use PWD here to avoid h2xml finding a system
# # installed version of the comedilib/comedi.h file
-# h2xml ${PWD}/../../comedi.h -c -D__user="" -D"BIT(x)=(1<<(x))" \
-# -o comedi.h.xml
+# h2xml ${PWD}/$(INC_UAPI)/linux/comedi.h -c D__user="" -o comedi.h.xml
#
# comedi_h.py : comedi.h.xml
# xml2py ./comedi.h.xml -o comedi_h.py
diff --git a/drivers/comedi/drivers/ni_tio.h b/drivers/comedi/drivers/ni_tio.h
index e7b05718df9b..9ae2221c3c18 100644
--- a/drivers/comedi/drivers/ni_tio.h
+++ b/drivers/comedi/drivers/ni_tio.h
@@ -8,7 +8,7 @@
#ifndef _COMEDI_NI_TIO_H
#define _COMEDI_NI_TIO_H
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
enum ni_gpct_register {
NITIO_G0_AUTO_INC,
diff --git a/drivers/comedi/drivers/ni_usb6501.c b/drivers/comedi/drivers/ni_usb6501.c
index c42987b74b1d..0dd9edf7bced 100644
--- a/drivers/comedi/drivers/ni_usb6501.c
+++ b/drivers/comedi/drivers/ni_usb6501.c
@@ -87,8 +87,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
-
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
#define NI6501_TIMEOUT 1000
diff --git a/drivers/comedi/drivers/pcl711.c b/drivers/comedi/drivers/pcl711.c
index bd6f42fe9e3c..05172c553c8a 100644
--- a/drivers/comedi/drivers/pcl711.c
+++ b/drivers/comedi/drivers/pcl711.c
@@ -29,10 +29,8 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
-
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
/*
* I/O port register map
diff --git a/drivers/comedi/drivers/pcl724.c b/drivers/comedi/drivers/pcl724.c
index 1a5799278a7a..948a0576c9ef 100644
--- a/drivers/comedi/drivers/pcl724.c
+++ b/drivers/comedi/drivers/pcl724.c
@@ -25,9 +25,8 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
struct pcl724_board {
const char *name;
diff --git a/drivers/comedi/drivers/pcl726.c b/drivers/comedi/drivers/pcl726.c
index 88f25d7e76f7..0430630e6ebb 100644
--- a/drivers/comedi/drivers/pcl726.c
+++ b/drivers/comedi/drivers/pcl726.c
@@ -50,8 +50,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#define PCL726_AO_MSB_REG(x) (0x00 + ((x) * 2))
#define PCL726_AO_LSB_REG(x) (0x01 + ((x) * 2))
diff --git a/drivers/comedi/drivers/pcl730.c b/drivers/comedi/drivers/pcl730.c
index 32a29129e6e8..d2733cd5383d 100644
--- a/drivers/comedi/drivers/pcl730.c
+++ b/drivers/comedi/drivers/pcl730.c
@@ -25,7 +25,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/pcl812.c b/drivers/comedi/drivers/pcl812.c
index b87ab3840eee..70dbc129fcf5 100644
--- a/drivers/comedi/drivers/pcl812.c
+++ b/drivers/comedi/drivers/pcl812.c
@@ -114,11 +114,9 @@
#include <linux/gfp.h>
#include <linux/delay.h>
#include <linux/io.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/pcl816.c b/drivers/comedi/drivers/pcl816.c
index c368a337a0ae..a5e5320be648 100644
--- a/drivers/comedi/drivers/pcl816.c
+++ b/drivers/comedi/drivers/pcl816.c
@@ -35,11 +35,9 @@
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/pcl818.c b/drivers/comedi/drivers/pcl818.c
index f4b4a686c710..29e503de8267 100644
--- a/drivers/comedi/drivers/pcl818.c
+++ b/drivers/comedi/drivers/pcl818.c
@@ -97,11 +97,9 @@
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
-
-#include "comedi_isadma.h"
-#include "comedi_8254.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8254.h>
+#include <linux/comedi/comedi_isadma.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/pcm3724.c b/drivers/comedi/drivers/pcm3724.c
index 0cb1ad060402..e4103f9eeced 100644
--- a/drivers/comedi/drivers/pcm3724.c
+++ b/drivers/comedi/drivers/pcm3724.c
@@ -24,9 +24,8 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
-
-#include "8255.h"
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedi_8255.h>
/*
* Register I/O Map
diff --git a/drivers/comedi/drivers/pcmad.c b/drivers/comedi/drivers/pcmad.c
index eec89a0afb2f..976eda43881b 100644
--- a/drivers/comedi/drivers/pcmad.c
+++ b/drivers/comedi/drivers/pcmad.c
@@ -29,7 +29,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
#define PCMAD_STATUS 0
#define PCMAD_LSB 1
diff --git a/drivers/comedi/drivers/pcmda12.c b/drivers/comedi/drivers/pcmda12.c
index 14ab1f0d1e9f..611f13bedca0 100644
--- a/drivers/comedi/drivers/pcmda12.c
+++ b/drivers/comedi/drivers/pcmda12.c
@@ -40,7 +40,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/* AI range is not configurable, it's set by jumpers on the board */
static const struct comedi_lrange pcmda12_ranges = {
diff --git a/drivers/comedi/drivers/pcmmio.c b/drivers/comedi/drivers/pcmmio.c
index 24a9568d3378..c2402239d551 100644
--- a/drivers/comedi/drivers/pcmmio.c
+++ b/drivers/comedi/drivers/pcmmio.c
@@ -66,8 +66,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/pcmuio.c b/drivers/comedi/drivers/pcmuio.c
index b299d648a0eb..33b24dbbb919 100644
--- a/drivers/comedi/drivers/pcmuio.c
+++ b/drivers/comedi/drivers/pcmuio.c
@@ -65,8 +65,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/quatech_daqp_cs.c b/drivers/comedi/drivers/quatech_daqp_cs.c
index fe4408ebf6b3..2a76c75c513b 100644
--- a/drivers/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/comedi/drivers/quatech_daqp_cs.c
@@ -41,8 +41,7 @@
*/
#include <linux/module.h>
-
-#include "../comedi_pcmcia.h"
+#include <linux/comedi/comedi_pcmcia.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/rtd520.c b/drivers/comedi/drivers/rtd520.c
index 2d99a648b054..7e0ec1a2a2ca 100644
--- a/drivers/comedi/drivers/rtd520.c
+++ b/drivers/comedi/drivers/rtd520.c
@@ -85,10 +85,9 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/comedi/comedi_pci.h>
+#include <linux/comedi/comedi_8254.h>
-#include "../comedi_pci.h"
-
-#include "comedi_8254.h"
#include "plx9080.h"
/*
diff --git a/drivers/comedi/drivers/rti800.c b/drivers/comedi/drivers/rti800.c
index 327fd93b8b12..1b02e47bdb4c 100644
--- a/drivers/comedi/drivers/rti800.c
+++ b/drivers/comedi/drivers/rti800.c
@@ -42,7 +42,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register map
diff --git a/drivers/comedi/drivers/rti802.c b/drivers/comedi/drivers/rti802.c
index 195e2b1ac4c1..d66762a22258 100644
--- a/drivers/comedi/drivers/rti802.c
+++ b/drivers/comedi/drivers/rti802.c
@@ -22,7 +22,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/s526.c b/drivers/comedi/drivers/s526.c
index 085cf5b449e5..9245c679a3c4 100644
--- a/drivers/comedi/drivers/s526.c
+++ b/drivers/comedi/drivers/s526.c
@@ -27,7 +27,7 @@
*/
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* Register I/O map
diff --git a/drivers/comedi/drivers/s626.c b/drivers/comedi/drivers/s626.c
index e7aba937d896..0e5f9a9a7fd3 100644
--- a/drivers/comedi/drivers/s626.c
+++ b/drivers/comedi/drivers/s626.c
@@ -55,8 +55,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
-
-#include "../comedi_pci.h"
+#include <linux/comedi/comedi_pci.h>
#include "s626.h"
diff --git a/drivers/comedi/drivers/ssv_dnp.c b/drivers/comedi/drivers/ssv_dnp.c
index 016d315aa584..813bd0853b0b 100644
--- a/drivers/comedi/drivers/ssv_dnp.c
+++ b/drivers/comedi/drivers/ssv_dnp.c
@@ -19,7 +19,7 @@
/* include files ----------------------------------------------------------- */
#include <linux/module.h>
-#include "../comedidev.h"
+#include <linux/comedi/comedidev.h>
/* Some global definitions: the registers of the DNP ----------------------- */
/* */
diff --git a/drivers/comedi/drivers/usbdux.c b/drivers/comedi/drivers/usbdux.c
index 0350f303d557..92d514b3c1c3 100644
--- a/drivers/comedi/drivers/usbdux.c
+++ b/drivers/comedi/drivers/usbdux.c
@@ -73,8 +73,7 @@
#include <linux/input.h>
#include <linux/fcntl.h>
#include <linux/compiler.h>
-
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
/* constants for firmware upload and download */
#define USBDUX_FIRMWARE "usbdux_firmware.bin"
diff --git a/drivers/comedi/drivers/usbduxfast.c b/drivers/comedi/drivers/usbduxfast.c
index 4af012968cb6..39faae0ecb19 100644
--- a/drivers/comedi/drivers/usbduxfast.c
+++ b/drivers/comedi/drivers/usbduxfast.c
@@ -40,7 +40,7 @@
#include <linux/input.h>
#include <linux/fcntl.h>
#include <linux/compiler.h>
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
/*
* timeout for the USB-transfer
diff --git a/drivers/comedi/drivers/usbduxsigma.c b/drivers/comedi/drivers/usbduxsigma.c
index 54d7605e909f..2aaeaf44fbe5 100644
--- a/drivers/comedi/drivers/usbduxsigma.c
+++ b/drivers/comedi/drivers/usbduxsigma.c
@@ -40,8 +40,7 @@
#include <linux/fcntl.h>
#include <linux/compiler.h>
#include <asm/unaligned.h>
-
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
/* timeout for the USB-transfer in ms*/
#define BULK_TIMEOUT 1000
diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c
index 4b00a9ea611a..46023adc5395 100644
--- a/drivers/comedi/drivers/vmk80xx.c
+++ b/drivers/comedi/drivers/vmk80xx.c
@@ -35,8 +35,7 @@
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/uaccess.h>
-
-#include "../comedi_usb.h"
+#include <linux/comedi/comedi_usb.h>
enum {
DEVICE_VMK8055,
diff --git a/drivers/comedi/kcomedilib/kcomedilib_main.c b/drivers/comedi/kcomedilib/kcomedilib_main.c
index df9bba1b69ed..43fbe1a63b14 100644
--- a/drivers/comedi/kcomedilib/kcomedilib_main.c
+++ b/drivers/comedi/kcomedilib/kcomedilib_main.c
@@ -16,9 +16,9 @@
#include <linux/mm.h>
#include <linux/io.h>
-#include "../comedi.h"
-#include "../comedilib.h"
-#include "../comedidev.h"
+#include <linux/comedi.h>
+#include <linux/comedi/comedidev.h>
+#include <linux/comedi/comedilib.h>
MODULE_AUTHOR("David Schleef <ds@schleef.org>");
MODULE_DESCRIPTION("Comedi kernel library");
diff --git a/drivers/comedi/proc.c b/drivers/comedi/proc.c
index 8bc8e42beb90..2e4496633d3d 100644
--- a/drivers/comedi/proc.c
+++ b/drivers/comedi/proc.c
@@ -13,7 +13,7 @@
* was cool.
*/
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "comedi_internal.h"
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/drivers/comedi/range.c b/drivers/comedi/range.c
index a4e6fe0fb729..8f43cf88d784 100644
--- a/drivers/comedi/range.c
+++ b/drivers/comedi/range.c
@@ -8,7 +8,7 @@
*/
#include <linux/uaccess.h>
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
#include "comedi_internal.h"
const struct comedi_lrange range_bipolar10 = { 1, {BIP_RANGE(10)} };
diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c
index 1cbd60aaed69..a17e51d65aca 100644
--- a/drivers/counter/104-quad-8.c
+++ b/drivers/counter/104-quad-8.c
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/isa.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
@@ -44,7 +45,6 @@ MODULE_PARM_DESC(irq, "ACCES 104-QUAD-8 interrupt line numbers");
* @ab_enable: array of A and B inputs enable configurations
* @preset_enable: array of set_to_preset_on_index attribute configurations
* @irq_trigger: array of current IRQ trigger function configurations
- * @next_irq_trigger: array of next IRQ trigger function configurations
* @synchronous_mode: array of index function synchronous mode configurations
* @index_polarity: array of index function polarity configurations
* @cable_fault_enable: differential encoder cable status enable configurations
@@ -52,7 +52,6 @@ MODULE_PARM_DESC(irq, "ACCES 104-QUAD-8 interrupt line numbers");
*/
struct quad8 {
spinlock_t lock;
- struct counter_device counter;
unsigned int fck_prescaler[QUAD8_NUM_COUNTERS];
unsigned int preset[QUAD8_NUM_COUNTERS];
unsigned int count_mode[QUAD8_NUM_COUNTERS];
@@ -61,7 +60,6 @@ struct quad8 {
unsigned int ab_enable[QUAD8_NUM_COUNTERS];
unsigned int preset_enable[QUAD8_NUM_COUNTERS];
unsigned int irq_trigger[QUAD8_NUM_COUNTERS];
- unsigned int next_irq_trigger[QUAD8_NUM_COUNTERS];
unsigned int synchronous_mode[QUAD8_NUM_COUNTERS];
unsigned int index_polarity[QUAD8_NUM_COUNTERS];
unsigned int cable_fault_enable;
@@ -113,7 +111,7 @@ static int quad8_signal_read(struct counter_device *counter,
struct counter_signal *signal,
enum counter_signal_level *level)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
unsigned int state;
/* Only Index signal levels can be read */
@@ -131,7 +129,7 @@ static int quad8_signal_read(struct counter_device *counter,
static int quad8_count_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int base_offset = priv->base + 2 * count->id;
unsigned int flags;
unsigned int borrow;
@@ -163,7 +161,7 @@ static int quad8_count_read(struct counter_device *counter,
static int quad8_count_write(struct counter_device *counter,
struct counter_count *count, u64 val)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int base_offset = priv->base + 2 * count->id;
unsigned long irqflags;
int i;
@@ -213,7 +211,7 @@ static int quad8_function_read(struct counter_device *counter,
struct counter_count *count,
enum counter_function *function)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int id = count->id;
unsigned long irqflags;
@@ -243,7 +241,7 @@ static int quad8_function_write(struct counter_device *counter,
struct counter_count *count,
enum counter_function function)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int id = count->id;
unsigned int *const quadrature_mode = priv->quadrature_mode + id;
unsigned int *const scale = priv->quadrature_scale + id;
@@ -305,7 +303,7 @@ static int quad8_direction_read(struct counter_device *counter,
struct counter_count *count,
enum counter_count_direction *direction)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
unsigned int ud_flag;
const unsigned int flag_addr = priv->base + 2 * count->id + 1;
@@ -335,7 +333,7 @@ static int quad8_action_read(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action *action)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
int err;
enum counter_function function;
const size_t signal_a_id = count->synapses[0].signal->id;
@@ -390,7 +388,6 @@ static int quad8_action_read(struct counter_device *counter,
}
enum {
- QUAD8_EVENT_NONE = -1,
QUAD8_EVENT_CARRY = 0,
QUAD8_EVENT_COMPARE = 1,
QUAD8_EVENT_CARRY_BORROW = 2,
@@ -399,37 +396,52 @@ enum {
static int quad8_events_configure(struct counter_device *counter)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
unsigned long irq_enabled = 0;
unsigned long irqflags;
- size_t channel;
+ struct counter_event_node *event_node;
+ unsigned int next_irq_trigger;
unsigned long ior_cfg;
unsigned long base_offset;
spin_lock_irqsave(&priv->lock, irqflags);
- /* Enable interrupts for the requested channels, disable for the rest */
- for (channel = 0; channel < QUAD8_NUM_COUNTERS; channel++) {
- if (priv->next_irq_trigger[channel] == QUAD8_EVENT_NONE)
+ list_for_each_entry(event_node, &counter->events_list, l) {
+ switch (event_node->event) {
+ case COUNTER_EVENT_OVERFLOW:
+ next_irq_trigger = QUAD8_EVENT_CARRY;
+ break;
+ case COUNTER_EVENT_THRESHOLD:
+ next_irq_trigger = QUAD8_EVENT_COMPARE;
+ break;
+ case COUNTER_EVENT_OVERFLOW_UNDERFLOW:
+ next_irq_trigger = QUAD8_EVENT_CARRY_BORROW;
+ break;
+ case COUNTER_EVENT_INDEX:
+ next_irq_trigger = QUAD8_EVENT_INDEX;
+ break;
+ default:
+ /* should never reach this path */
+ spin_unlock_irqrestore(&priv->lock, irqflags);
+ return -EINVAL;
+ }
+
+ /* Skip configuration if it is the same as previously set */
+ if (priv->irq_trigger[event_node->channel] == next_irq_trigger)
continue;
- if (priv->irq_trigger[channel] != priv->next_irq_trigger[channel]) {
- /* Save new IRQ function configuration */
- priv->irq_trigger[channel] = priv->next_irq_trigger[channel];
+ /* Save new IRQ function configuration */
+ priv->irq_trigger[event_node->channel] = next_irq_trigger;
- /* Load configuration to I/O Control Register */
- ior_cfg = priv->ab_enable[channel] |
- priv->preset_enable[channel] << 1 |
- priv->irq_trigger[channel] << 3;
- base_offset = priv->base + 2 * channel + 1;
- outb(QUAD8_CTR_IOR | ior_cfg, base_offset);
- }
-
- /* Reset next IRQ trigger function configuration */
- priv->next_irq_trigger[channel] = QUAD8_EVENT_NONE;
+ /* Load configuration to I/O Control Register */
+ ior_cfg = priv->ab_enable[event_node->channel] |
+ priv->preset_enable[event_node->channel] << 1 |
+ priv->irq_trigger[event_node->channel] << 3;
+ base_offset = priv->base + 2 * event_node->channel + 1;
+ outb(QUAD8_CTR_IOR | ior_cfg, base_offset);
/* Enable IRQ line */
- irq_enabled |= BIT(channel);
+ irq_enabled |= BIT(event_node->channel);
}
outb(irq_enabled, priv->base + QUAD8_REG_INDEX_INTERRUPT);
@@ -442,35 +454,20 @@ static int quad8_events_configure(struct counter_device *counter)
static int quad8_watch_validate(struct counter_device *counter,
const struct counter_watch *watch)
{
- struct quad8 *const priv = counter->priv;
+ struct counter_event_node *event_node;
if (watch->channel > QUAD8_NUM_COUNTERS - 1)
return -EINVAL;
switch (watch->event) {
case COUNTER_EVENT_OVERFLOW:
- if (priv->next_irq_trigger[watch->channel] == QUAD8_EVENT_NONE)
- priv->next_irq_trigger[watch->channel] = QUAD8_EVENT_CARRY;
- else if (priv->next_irq_trigger[watch->channel] != QUAD8_EVENT_CARRY)
- return -EINVAL;
- return 0;
case COUNTER_EVENT_THRESHOLD:
- if (priv->next_irq_trigger[watch->channel] == QUAD8_EVENT_NONE)
- priv->next_irq_trigger[watch->channel] = QUAD8_EVENT_COMPARE;
- else if (priv->next_irq_trigger[watch->channel] != QUAD8_EVENT_COMPARE)
- return -EINVAL;
- return 0;
case COUNTER_EVENT_OVERFLOW_UNDERFLOW:
- if (priv->next_irq_trigger[watch->channel] == QUAD8_EVENT_NONE)
- priv->next_irq_trigger[watch->channel] = QUAD8_EVENT_CARRY_BORROW;
- else if (priv->next_irq_trigger[watch->channel] != QUAD8_EVENT_CARRY_BORROW)
- return -EINVAL;
- return 0;
case COUNTER_EVENT_INDEX:
- if (priv->next_irq_trigger[watch->channel] == QUAD8_EVENT_NONE)
- priv->next_irq_trigger[watch->channel] = QUAD8_EVENT_INDEX;
- else if (priv->next_irq_trigger[watch->channel] != QUAD8_EVENT_INDEX)
- return -EINVAL;
+ list_for_each_entry(event_node, &counter->next_events_list, l)
+ if (watch->channel == event_node->channel &&
+ watch->event != event_node->event)
+ return -EINVAL;
return 0;
default:
return -EINVAL;
@@ -497,7 +494,7 @@ static int quad8_index_polarity_get(struct counter_device *counter,
struct counter_signal *signal,
u32 *index_polarity)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id - 16;
*index_polarity = priv->index_polarity[channel_id];
@@ -509,7 +506,7 @@ static int quad8_index_polarity_set(struct counter_device *counter,
struct counter_signal *signal,
u32 index_polarity)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id - 16;
const int base_offset = priv->base + 2 * channel_id + 1;
unsigned long irqflags;
@@ -538,7 +535,7 @@ static int quad8_synchronous_mode_get(struct counter_device *counter,
struct counter_signal *signal,
u32 *synchronous_mode)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id - 16;
*synchronous_mode = priv->synchronous_mode[channel_id];
@@ -550,7 +547,7 @@ static int quad8_synchronous_mode_set(struct counter_device *counter,
struct counter_signal *signal,
u32 synchronous_mode)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id - 16;
const int base_offset = priv->base + 2 * channel_id + 1;
unsigned long irqflags;
@@ -589,7 +586,7 @@ static int quad8_count_mode_read(struct counter_device *counter,
struct counter_count *count,
enum counter_count_mode *cnt_mode)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
/* Map 104-QUAD-8 count mode to Generic Counter count mode */
switch (priv->count_mode[count->id]) {
@@ -614,7 +611,7 @@ static int quad8_count_mode_write(struct counter_device *counter,
struct counter_count *count,
enum counter_count_mode cnt_mode)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
unsigned int count_mode;
unsigned int mode_cfg;
const int base_offset = priv->base + 2 * count->id + 1;
@@ -661,7 +658,7 @@ static int quad8_count_mode_write(struct counter_device *counter,
static int quad8_count_enable_read(struct counter_device *counter,
struct counter_count *count, u8 *enable)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
*enable = priv->ab_enable[count->id];
@@ -671,7 +668,7 @@ static int quad8_count_enable_read(struct counter_device *counter,
static int quad8_count_enable_write(struct counter_device *counter,
struct counter_count *count, u8 enable)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int base_offset = priv->base + 2 * count->id;
unsigned long irqflags;
unsigned int ior_cfg;
@@ -699,7 +696,7 @@ static const char *const quad8_noise_error_states[] = {
static int quad8_error_noise_get(struct counter_device *counter,
struct counter_count *count, u32 *noise_error)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
const int base_offset = priv->base + 2 * count->id + 1;
*noise_error = !!(inb(base_offset) & QUAD8_FLAG_E);
@@ -710,7 +707,7 @@ static int quad8_error_noise_get(struct counter_device *counter,
static int quad8_count_preset_read(struct counter_device *counter,
struct counter_count *count, u64 *preset)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
*preset = priv->preset[count->id];
@@ -736,7 +733,7 @@ static void quad8_preset_register_set(struct quad8 *const priv, const int id,
static int quad8_count_preset_write(struct counter_device *counter,
struct counter_count *count, u64 preset)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
unsigned long irqflags;
/* Only 24-bit values are supported */
@@ -755,7 +752,7 @@ static int quad8_count_preset_write(struct counter_device *counter,
static int quad8_count_ceiling_read(struct counter_device *counter,
struct counter_count *count, u64 *ceiling)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
unsigned long irqflags;
spin_lock_irqsave(&priv->lock, irqflags);
@@ -780,7 +777,7 @@ static int quad8_count_ceiling_read(struct counter_device *counter,
static int quad8_count_ceiling_write(struct counter_device *counter,
struct counter_count *count, u64 ceiling)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
unsigned long irqflags;
/* Only 24-bit values are supported */
@@ -807,7 +804,7 @@ static int quad8_count_preset_enable_read(struct counter_device *counter,
struct counter_count *count,
u8 *preset_enable)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
*preset_enable = !priv->preset_enable[count->id];
@@ -818,7 +815,7 @@ static int quad8_count_preset_enable_write(struct counter_device *counter,
struct counter_count *count,
u8 preset_enable)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const int base_offset = priv->base + 2 * count->id + 1;
unsigned long irqflags;
unsigned int ior_cfg;
@@ -845,7 +842,7 @@ static int quad8_signal_cable_fault_read(struct counter_device *counter,
struct counter_signal *signal,
u8 *cable_fault)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id / 2;
unsigned long irqflags;
bool disabled;
@@ -875,7 +872,7 @@ static int quad8_signal_cable_fault_enable_read(struct counter_device *counter,
struct counter_signal *signal,
u8 *enable)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id / 2;
*enable = !!(priv->cable_fault_enable & BIT(channel_id));
@@ -887,7 +884,7 @@ static int quad8_signal_cable_fault_enable_write(struct counter_device *counter,
struct counter_signal *signal,
u8 enable)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id / 2;
unsigned long irqflags;
unsigned int cable_fault_enable;
@@ -913,7 +910,7 @@ static int quad8_signal_fck_prescaler_read(struct counter_device *counter,
struct counter_signal *signal,
u8 *prescaler)
{
- const struct quad8 *const priv = counter->priv;
+ const struct quad8 *const priv = counter_priv(counter);
*prescaler = priv->fck_prescaler[signal->id / 2];
@@ -924,7 +921,7 @@ static int quad8_signal_fck_prescaler_write(struct counter_device *counter,
struct counter_signal *signal,
u8 prescaler)
{
- struct quad8 *const priv = counter->priv;
+ struct quad8 *const priv = counter_priv(counter);
const size_t channel_id = signal->id / 2;
const int base_offset = priv->base + 2 * channel_id;
unsigned long irqflags;
@@ -1085,7 +1082,8 @@ static struct counter_count quad8_counts[] = {
static irqreturn_t quad8_irq_handler(int irq, void *private)
{
- struct quad8 *const priv = private;
+ struct counter_device *counter = private;
+ struct quad8 *const priv = counter_priv(counter);
const unsigned long base = priv->base;
unsigned long irq_status;
unsigned long channel;
@@ -1116,7 +1114,7 @@ static irqreturn_t quad8_irq_handler(int irq, void *private)
continue;
}
- counter_push_event(&priv->counter, event, channel);
+ counter_push_event(counter, event, channel);
}
/* Clear pending interrupts on device */
@@ -1127,6 +1125,7 @@ static irqreturn_t quad8_irq_handler(int irq, void *private)
static int quad8_probe(struct device *dev, unsigned int id)
{
+ struct counter_device *counter;
struct quad8 *priv;
int i, j;
unsigned int base_offset;
@@ -1138,19 +1137,19 @@ static int quad8_probe(struct device *dev, unsigned int id)
return -EBUSY;
}
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
+ priv = counter_priv(counter);
/* Initialize Counter device and driver data */
- priv->counter.name = dev_name(dev);
- priv->counter.parent = dev;
- priv->counter.ops = &quad8_ops;
- priv->counter.counts = quad8_counts;
- priv->counter.num_counts = ARRAY_SIZE(quad8_counts);
- priv->counter.signals = quad8_signals;
- priv->counter.num_signals = ARRAY_SIZE(quad8_signals);
- priv->counter.priv = priv;
+ counter->name = dev_name(dev);
+ counter->parent = dev;
+ counter->ops = &quad8_ops;
+ counter->counts = quad8_counts;
+ counter->num_counts = ARRAY_SIZE(quad8_counts);
+ counter->signals = quad8_signals;
+ counter->num_signals = ARRAY_SIZE(quad8_signals);
priv->base = base[id];
spin_lock_init(&priv->lock);
@@ -1183,20 +1182,22 @@ static int quad8_probe(struct device *dev, unsigned int id)
outb(QUAD8_CTR_IOR, base_offset + 1);
/* Disable index function; negative index polarity */
outb(QUAD8_CTR_IDR, base_offset + 1);
- /* Initialize next IRQ trigger function configuration */
- priv->next_irq_trigger[i] = QUAD8_EVENT_NONE;
}
/* Disable Differential Encoder Cable Status for all channels */
outb(0xFF, base[id] + QUAD8_DIFF_ENCODER_CABLE_STATUS);
/* Enable all counters and enable interrupt function */
outb(QUAD8_CHAN_OP_ENABLE_INTERRUPT_FUNC, base[id] + QUAD8_REG_CHAN_OP);
- err = devm_request_irq(dev, irq[id], quad8_irq_handler, IRQF_SHARED,
- priv->counter.name, priv);
+ err = devm_request_irq(&counter->dev, irq[id], quad8_irq_handler,
+ IRQF_SHARED, counter->name, counter);
if (err)
return err;
- return devm_counter_register(dev, &priv->counter);
+ err = devm_counter_add(dev, counter);
+ if (err < 0)
+ return dev_err_probe(dev, err, "Failed to add counter\n");
+
+ return 0;
}
static struct isa_driver quad8_driver = {
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index 5acc54539623..7e0957eea094 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -15,6 +15,7 @@
#include <linux/kdev_t.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <linux/types.h>
#include <linux/wait.h>
@@ -24,12 +25,25 @@
/* Provides a unique ID for each counter device */
static DEFINE_IDA(counter_ida);
+struct counter_device_allochelper {
+ struct counter_device counter;
+
+ /*
+ * This is cache line aligned to ensure private data behaves like if it
+ * were kmalloced separately.
+ */
+ unsigned long privdata[] ____cacheline_aligned;
+};
+
static void counter_device_release(struct device *dev)
{
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter =
+ container_of(dev, struct counter_device, dev);
counter_chrdev_remove(counter);
ida_free(&counter_ida, dev->id);
+
+ kfree(container_of(counter, struct counter_device_allochelper, counter));
}
static struct device_type counter_device_type = {
@@ -45,62 +59,108 @@ static struct bus_type counter_bus_type = {
static dev_t counter_devt;
/**
- * counter_register - register Counter to the system
- * @counter: pointer to Counter to register
+ * counter_priv - access counter device private data
+ * @counter: counter device
*
- * This function registers a Counter to the system. A sysfs "counter" directory
- * will be created and populated with sysfs attributes correlating with the
- * Counter Signals, Synapses, and Counts respectively.
+ * Get the counter device private data
+ */
+void *counter_priv(const struct counter_device *const counter)
+{
+ struct counter_device_allochelper *ch =
+ container_of(counter, struct counter_device_allochelper, counter);
+
+ return &ch->privdata;
+}
+EXPORT_SYMBOL_GPL(counter_priv);
+
+/**
+ * counter_alloc - allocate a counter_device
+ * @sizeof_priv: size of the driver private data
+ *
+ * This is part one of counter registration. The structure is allocated
+ * dynamically to ensure the right lifetime for the embedded struct device.
*
- * RETURNS:
- * 0 on success, negative error number on failure.
+ * If this succeeds, call counter_put() to get rid of the counter_device again.
*/
-int counter_register(struct counter_device *const counter)
+struct counter_device *counter_alloc(size_t sizeof_priv)
{
- struct device *const dev = &counter->dev;
- int id;
+ struct counter_device_allochelper *ch;
+ struct counter_device *counter;
+ struct device *dev;
int err;
+ ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL);
+ if (!ch) {
+ err = -ENOMEM;
+ goto err_alloc_ch;
+ }
+
+ counter = &ch->counter;
+ dev = &counter->dev;
+
/* Acquire unique ID */
- id = ida_alloc(&counter_ida, GFP_KERNEL);
- if (id < 0)
- return id;
+ err = ida_alloc(&counter_ida, GFP_KERNEL);
+ if (err < 0)
+ goto err_ida_alloc;
+ dev->id = err;
mutex_init(&counter->ops_exist_lock);
-
- /* Configure device structure for Counter */
- dev->id = id;
dev->type = &counter_device_type;
dev->bus = &counter_bus_type;
- dev->devt = MKDEV(MAJOR(counter_devt), id);
+ dev->devt = MKDEV(MAJOR(counter_devt), dev->id);
+
+ err = counter_chrdev_add(counter);
+ if (err < 0)
+ goto err_chrdev_add;
+
+ device_initialize(dev);
+
+ return counter;
+
+err_chrdev_add:
+
+ ida_free(&counter_ida, dev->id);
+err_ida_alloc:
+
+ kfree(ch);
+err_alloc_ch:
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(counter_alloc);
+
+void counter_put(struct counter_device *counter)
+{
+ put_device(&counter->dev);
+}
+EXPORT_SYMBOL_GPL(counter_put);
+
+/**
+ * counter_add - complete registration of a counter
+ * @counter: the counter to add
+ *
+ * This is part two of counter registration.
+ *
+ * If this succeeds, call counter_unregister() to get rid of the counter_device again.
+ */
+int counter_add(struct counter_device *counter)
+{
+ int err;
+ struct device *dev = &counter->dev;
+
if (counter->parent) {
dev->parent = counter->parent;
dev->of_node = counter->parent->of_node;
}
- device_initialize(dev);
- dev_set_drvdata(dev, counter);
err = counter_sysfs_add(counter);
if (err < 0)
- goto err_free_id;
-
- err = counter_chrdev_add(counter);
- if (err < 0)
- goto err_free_id;
-
- err = cdev_device_add(&counter->chrdev, dev);
- if (err < 0)
- goto err_remove_chrdev;
-
- return 0;
+ return err;
-err_remove_chrdev:
- counter_chrdev_remove(counter);
-err_free_id:
- put_device(dev);
- return err;
+ /* implies device_add(dev) */
+ return cdev_device_add(&counter->chrdev, dev);
}
-EXPORT_SYMBOL_GPL(counter_register);
+EXPORT_SYMBOL_GPL(counter_add);
/**
* counter_unregister - unregister Counter from the system
@@ -121,8 +181,6 @@ void counter_unregister(struct counter_device *const counter)
wake_up(&counter->events_wait);
mutex_unlock(&counter->ops_exist_lock);
-
- put_device(&counter->dev);
}
EXPORT_SYMBOL_GPL(counter_unregister);
@@ -131,30 +189,56 @@ static void devm_counter_release(void *counter)
counter_unregister(counter);
}
+static void devm_counter_put(void *counter)
+{
+ counter_put(counter);
+}
+
/**
- * devm_counter_register - Resource-managed counter_register
- * @dev: device to allocate counter_device for
- * @counter: pointer to Counter to register
+ * devm_counter_alloc - allocate a counter_device
+ * @dev: the device to register the release callback for
+ * @sizeof_priv: size of the driver private data
*
- * Managed counter_register. The Counter registered with this function is
- * automatically unregistered on driver detach. This function calls
- * counter_register internally. Refer to that function for more information.
+ * This is the device managed version of counter_add(). It registers a cleanup
+ * callback to care for calling counter_put().
+ */
+struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv)
+{
+ struct counter_device *counter;
+ int err;
+
+ counter = counter_alloc(sizeof_priv);
+ if (IS_ERR(counter))
+ return counter;
+
+ err = devm_add_action_or_reset(dev, devm_counter_put, counter);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ return counter;
+}
+EXPORT_SYMBOL_GPL(devm_counter_alloc);
+
+/**
+ * devm_counter_add - complete registration of a counter
+ * @dev: the device to register the release callback for
+ * @counter: the counter to add
*
- * RETURNS:
- * 0 on success, negative error number on failure.
+ * This is the device managed version of counter_add(). It registers a cleanup
+ * callback to care for calling counter_unregister().
*/
-int devm_counter_register(struct device *dev,
- struct counter_device *const counter)
+int devm_counter_add(struct device *dev,
+ struct counter_device *const counter)
{
int err;
- err = counter_register(counter);
+ err = counter_add(counter);
if (err < 0)
return err;
return devm_add_action_or_reset(dev, devm_counter_release, counter);
}
-EXPORT_SYMBOL_GPL(devm_counter_register);
+EXPORT_SYMBOL_GPL(devm_counter_add);
#define COUNTER_DEV_MAX 256
diff --git a/drivers/counter/ftm-quaddec.c b/drivers/counter/ftm-quaddec.c
index 5ef0478709cd..2a58582a9df4 100644
--- a/drivers/counter/ftm-quaddec.c
+++ b/drivers/counter/ftm-quaddec.c
@@ -26,7 +26,6 @@
})
struct ftm_quaddec {
- struct counter_device counter;
struct platform_device *pdev;
void __iomem *ftm_base;
bool big_endian;
@@ -118,7 +117,7 @@ static void ftm_quaddec_disable(void *ftm)
static int ftm_quaddec_get_prescaler(struct counter_device *counter,
struct counter_count *count, u32 *cnt_mode)
{
- struct ftm_quaddec *ftm = counter->priv;
+ struct ftm_quaddec *ftm = counter_priv(counter);
uint32_t scflags;
ftm_read(ftm, FTM_SC, &scflags);
@@ -131,7 +130,7 @@ static int ftm_quaddec_get_prescaler(struct counter_device *counter,
static int ftm_quaddec_set_prescaler(struct counter_device *counter,
struct counter_count *count, u32 cnt_mode)
{
- struct ftm_quaddec *ftm = counter->priv;
+ struct ftm_quaddec *ftm = counter_priv(counter);
mutex_lock(&ftm->ftm_quaddec_mutex);
@@ -162,7 +161,7 @@ static int ftm_quaddec_count_read(struct counter_device *counter,
struct counter_count *count,
u64 *val)
{
- struct ftm_quaddec *const ftm = counter->priv;
+ struct ftm_quaddec *const ftm = counter_priv(counter);
uint32_t cntval;
ftm_read(ftm, FTM_CNT, &cntval);
@@ -176,7 +175,7 @@ static int ftm_quaddec_count_write(struct counter_device *counter,
struct counter_count *count,
const u64 val)
{
- struct ftm_quaddec *const ftm = counter->priv;
+ struct ftm_quaddec *const ftm = counter_priv(counter);
if (val != 0) {
dev_warn(&ftm->pdev->dev, "Can only accept '0' as new counter value\n");
@@ -259,17 +258,17 @@ static struct counter_count ftm_quaddec_counts = {
static int ftm_quaddec_probe(struct platform_device *pdev)
{
+ struct counter_device *counter;
struct ftm_quaddec *ftm;
struct device_node *node = pdev->dev.of_node;
struct resource *io;
int ret;
- ftm = devm_kzalloc(&pdev->dev, sizeof(*ftm), GFP_KERNEL);
- if (!ftm)
+ counter = devm_counter_alloc(&pdev->dev, sizeof(*ftm));
+ if (!counter)
return -ENOMEM;
-
- platform_set_drvdata(pdev, ftm);
+ ftm = counter_priv(counter);
io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!io) {
@@ -285,14 +284,13 @@ static int ftm_quaddec_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Failed to map memory region\n");
return -EINVAL;
}
- ftm->counter.name = dev_name(&pdev->dev);
- ftm->counter.parent = &pdev->dev;
- ftm->counter.ops = &ftm_quaddec_cnt_ops;
- ftm->counter.counts = &ftm_quaddec_counts;
- ftm->counter.num_counts = 1;
- ftm->counter.signals = ftm_quaddec_signals;
- ftm->counter.num_signals = ARRAY_SIZE(ftm_quaddec_signals);
- ftm->counter.priv = ftm;
+ counter->name = dev_name(&pdev->dev);
+ counter->parent = &pdev->dev;
+ counter->ops = &ftm_quaddec_cnt_ops;
+ counter->counts = &ftm_quaddec_counts;
+ counter->num_counts = 1;
+ counter->signals = ftm_quaddec_signals;
+ counter->num_signals = ARRAY_SIZE(ftm_quaddec_signals);
mutex_init(&ftm->ftm_quaddec_mutex);
@@ -302,9 +300,9 @@ static int ftm_quaddec_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = devm_counter_register(&pdev->dev, &ftm->counter);
+ ret = devm_counter_add(&pdev->dev, counter);
if (ret)
- return ret;
+ return dev_err_probe(&pdev->dev, ret, "Failed to add counter\n");
return 0;
}
diff --git a/drivers/counter/intel-qep.c b/drivers/counter/intel-qep.c
index 0924d16de6e2..47a6a9dfc9e8 100644
--- a/drivers/counter/intel-qep.c
+++ b/drivers/counter/intel-qep.c
@@ -63,7 +63,6 @@
#define INTEL_QEP_CLK_PERIOD_NS 10
struct intel_qep {
- struct counter_device counter;
struct mutex lock;
struct device *dev;
void __iomem *regs;
@@ -109,7 +108,7 @@ static void intel_qep_init(struct intel_qep *qep)
static int intel_qep_count_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct intel_qep *const qep = counter->priv;
+ struct intel_qep *const qep = counter_priv(counter);
pm_runtime_get_sync(qep->dev);
*val = intel_qep_readl(qep, INTEL_QEPCOUNT);
@@ -176,7 +175,7 @@ static struct counter_synapse intel_qep_count_synapses[] = {
static int intel_qep_ceiling_read(struct counter_device *counter,
struct counter_count *count, u64 *ceiling)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
pm_runtime_get_sync(qep->dev);
*ceiling = intel_qep_readl(qep, INTEL_QEPMAX);
@@ -188,7 +187,7 @@ static int intel_qep_ceiling_read(struct counter_device *counter,
static int intel_qep_ceiling_write(struct counter_device *counter,
struct counter_count *count, u64 max)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
int ret = 0;
/* Intel QEP ceiling configuration only supports 32-bit values */
@@ -213,7 +212,7 @@ out:
static int intel_qep_enable_read(struct counter_device *counter,
struct counter_count *count, u8 *enable)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
*enable = qep->enabled;
@@ -223,7 +222,7 @@ static int intel_qep_enable_read(struct counter_device *counter,
static int intel_qep_enable_write(struct counter_device *counter,
struct counter_count *count, u8 val)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
u32 reg;
bool changed;
@@ -256,7 +255,7 @@ static int intel_qep_spike_filter_ns_read(struct counter_device *counter,
struct counter_count *count,
u64 *length)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
u32 reg;
pm_runtime_get_sync(qep->dev);
@@ -277,7 +276,7 @@ static int intel_qep_spike_filter_ns_write(struct counter_device *counter,
struct counter_count *count,
u64 length)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
u32 reg;
bool enable;
int ret = 0;
@@ -326,7 +325,7 @@ static int intel_qep_preset_enable_read(struct counter_device *counter,
struct counter_count *count,
u8 *preset_enable)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
u32 reg;
pm_runtime_get_sync(qep->dev);
@@ -341,7 +340,7 @@ static int intel_qep_preset_enable_read(struct counter_device *counter,
static int intel_qep_preset_enable_write(struct counter_device *counter,
struct counter_count *count, u8 val)
{
- struct intel_qep *qep = counter->priv;
+ struct intel_qep *qep = counter_priv(counter);
u32 reg;
int ret = 0;
@@ -392,14 +391,16 @@ static struct counter_count intel_qep_counter_count[] = {
static int intel_qep_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
+ struct counter_device *counter;
struct intel_qep *qep;
struct device *dev = &pci->dev;
void __iomem *regs;
int ret;
- qep = devm_kzalloc(dev, sizeof(*qep), GFP_KERNEL);
- if (!qep)
+ counter = devm_counter_alloc(dev, sizeof(*qep));
+ if (!counter)
return -ENOMEM;
+ qep = counter_priv(counter);
ret = pcim_enable_device(pci);
if (ret)
@@ -422,20 +423,23 @@ static int intel_qep_probe(struct pci_dev *pci, const struct pci_device_id *id)
intel_qep_init(qep);
pci_set_drvdata(pci, qep);
- qep->counter.name = pci_name(pci);
- qep->counter.parent = dev;
- qep->counter.ops = &intel_qep_counter_ops;
- qep->counter.counts = intel_qep_counter_count;
- qep->counter.num_counts = ARRAY_SIZE(intel_qep_counter_count);
- qep->counter.signals = intel_qep_signals;
- qep->counter.num_signals = ARRAY_SIZE(intel_qep_signals);
- qep->counter.priv = qep;
+ counter->name = pci_name(pci);
+ counter->parent = dev;
+ counter->ops = &intel_qep_counter_ops;
+ counter->counts = intel_qep_counter_count;
+ counter->num_counts = ARRAY_SIZE(intel_qep_counter_count);
+ counter->signals = intel_qep_signals;
+ counter->num_signals = ARRAY_SIZE(intel_qep_signals);
qep->enabled = false;
pm_runtime_put(dev);
pm_runtime_allow(dev);
- return devm_counter_register(&pci->dev, &qep->counter);
+ ret = devm_counter_add(&pci->dev, counter);
+ if (ret < 0)
+ return dev_err_probe(&pci->dev, ret, "Failed to add counter\n");
+
+ return 0;
}
static void intel_qep_remove(struct pci_dev *pci)
diff --git a/drivers/counter/interrupt-cnt.c b/drivers/counter/interrupt-cnt.c
index 8514a87fcbee..9e99702470c2 100644
--- a/drivers/counter/interrupt-cnt.c
+++ b/drivers/counter/interrupt-cnt.c
@@ -16,7 +16,6 @@
struct interrupt_cnt_priv {
atomic_t count;
- struct counter_device counter;
struct gpio_desc *gpio;
int irq;
bool enabled;
@@ -37,7 +36,7 @@ static irqreturn_t interrupt_cnt_isr(int irq, void *dev_id)
static int interrupt_cnt_enable_read(struct counter_device *counter,
struct counter_count *count, u8 *enable)
{
- struct interrupt_cnt_priv *priv = counter->priv;
+ struct interrupt_cnt_priv *priv = counter_priv(counter);
*enable = priv->enabled;
@@ -47,7 +46,7 @@ static int interrupt_cnt_enable_read(struct counter_device *counter,
static int interrupt_cnt_enable_write(struct counter_device *counter,
struct counter_count *count, u8 enable)
{
- struct interrupt_cnt_priv *priv = counter->priv;
+ struct interrupt_cnt_priv *priv = counter_priv(counter);
if (priv->enabled == enable)
return 0;
@@ -85,7 +84,7 @@ static int interrupt_cnt_action_read(struct counter_device *counter,
static int interrupt_cnt_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct interrupt_cnt_priv *priv = counter->priv;
+ struct interrupt_cnt_priv *priv = counter_priv(counter);
*val = atomic_read(&priv->count);
@@ -95,7 +94,7 @@ static int interrupt_cnt_read(struct counter_device *counter,
static int interrupt_cnt_write(struct counter_device *counter,
struct counter_count *count, const u64 val)
{
- struct interrupt_cnt_priv *priv = counter->priv;
+ struct interrupt_cnt_priv *priv = counter_priv(counter);
if (val != (typeof(priv->count.counter))val)
return -ERANGE;
@@ -122,7 +121,7 @@ static int interrupt_cnt_signal_read(struct counter_device *counter,
struct counter_signal *signal,
enum counter_signal_level *level)
{
- struct interrupt_cnt_priv *priv = counter->priv;
+ struct interrupt_cnt_priv *priv = counter_priv(counter);
int ret;
if (!priv->gpio)
@@ -148,12 +147,14 @@ static const struct counter_ops interrupt_cnt_ops = {
static int interrupt_cnt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct counter_device *counter;
struct interrupt_cnt_priv *priv;
int ret;
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
+ priv = counter_priv(counter);
priv->irq = platform_get_irq_optional(pdev, 0);
if (priv->irq == -ENXIO)
@@ -184,8 +185,8 @@ static int interrupt_cnt_probe(struct platform_device *pdev)
if (!priv->signals.name)
return -ENOMEM;
- priv->counter.signals = &priv->signals;
- priv->counter.num_signals = 1;
+ counter->signals = &priv->signals;
+ counter->num_signals = 1;
priv->synapses.actions_list = interrupt_cnt_synapse_actions;
priv->synapses.num_actions = ARRAY_SIZE(interrupt_cnt_synapse_actions);
@@ -199,12 +200,11 @@ static int interrupt_cnt_probe(struct platform_device *pdev)
priv->cnts.ext = interrupt_cnt_ext;
priv->cnts.num_ext = ARRAY_SIZE(interrupt_cnt_ext);
- priv->counter.priv = priv;
- priv->counter.name = dev_name(dev);
- priv->counter.parent = dev;
- priv->counter.ops = &interrupt_cnt_ops;
- priv->counter.counts = &priv->cnts;
- priv->counter.num_counts = 1;
+ counter->name = dev_name(dev);
+ counter->parent = dev;
+ counter->ops = &interrupt_cnt_ops;
+ counter->counts = &priv->cnts;
+ counter->num_counts = 1;
irq_set_status_flags(priv->irq, IRQ_NOAUTOEN);
ret = devm_request_irq(dev, priv->irq, interrupt_cnt_isr,
@@ -213,7 +213,11 @@ static int interrupt_cnt_probe(struct platform_device *pdev)
if (ret)
return ret;
- return devm_counter_register(dev, &priv->counter);
+ ret = devm_counter_add(dev, counter);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Failed to add counter\n");
+
+ return 0;
}
static const struct of_device_id interrupt_cnt_of_match[] = {
diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c
index 0ab1b2716784..00844445143b 100644
--- a/drivers/counter/microchip-tcb-capture.c
+++ b/drivers/counter/microchip-tcb-capture.c
@@ -24,7 +24,6 @@
struct mchp_tc_data {
const struct atmel_tcb_config *tc_cfg;
- struct counter_device counter;
struct regmap *regmap;
int qdec_mode;
int num_channels;
@@ -72,7 +71,7 @@ static int mchp_tc_count_function_read(struct counter_device *counter,
struct counter_count *count,
enum counter_function *function)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
if (priv->qdec_mode)
*function = COUNTER_FUNCTION_QUADRATURE_X4;
@@ -86,7 +85,7 @@ static int mchp_tc_count_function_write(struct counter_device *counter,
struct counter_count *count,
enum counter_function function)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
u32 bmr, cmr;
regmap_read(priv->regmap, ATMEL_TC_BMR, &bmr);
@@ -148,7 +147,7 @@ static int mchp_tc_count_signal_read(struct counter_device *counter,
struct counter_signal *signal,
enum counter_signal_level *lvl)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
bool sigstatus;
u32 sr;
@@ -169,7 +168,7 @@ static int mchp_tc_count_action_read(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action *action)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
u32 cmr;
regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CMR), &cmr);
@@ -197,7 +196,7 @@ static int mchp_tc_count_action_write(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action action)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
u32 edge = ATMEL_TC_ETRGEDG_NONE;
/* QDEC mode is rising edge only */
@@ -230,7 +229,7 @@ static int mchp_tc_count_action_write(struct counter_device *counter,
static int mchp_tc_count_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct mchp_tc_data *const priv = counter->priv;
+ struct mchp_tc_data *const priv = counter_priv(counter);
u32 cnt;
regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CV), &cnt);
@@ -296,6 +295,7 @@ static int mchp_tc_probe(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
const struct atmel_tcb_config *tcb_config;
const struct of_device_id *match;
+ struct counter_device *counter;
struct mchp_tc_data *priv;
char clk_name[7];
struct regmap *regmap;
@@ -303,11 +303,10 @@ static int mchp_tc_probe(struct platform_device *pdev)
int channel;
int ret, i;
- priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(&pdev->dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
-
- platform_set_drvdata(pdev, priv);
+ priv = counter_priv(counter);
match = of_match_node(atmel_tc_of_match, np->parent);
tcb_config = match->data;
@@ -362,16 +361,19 @@ static int mchp_tc_probe(struct platform_device *pdev)
priv->tc_cfg = tcb_config;
priv->regmap = regmap;
- priv->counter.name = dev_name(&pdev->dev);
- priv->counter.parent = &pdev->dev;
- priv->counter.ops = &mchp_tc_ops;
- priv->counter.num_counts = ARRAY_SIZE(mchp_tc_counts);
- priv->counter.counts = mchp_tc_counts;
- priv->counter.num_signals = ARRAY_SIZE(mchp_tc_count_signals);
- priv->counter.signals = mchp_tc_count_signals;
- priv->counter.priv = priv;
-
- return devm_counter_register(&pdev->dev, &priv->counter);
+ counter->name = dev_name(&pdev->dev);
+ counter->parent = &pdev->dev;
+ counter->ops = &mchp_tc_ops;
+ counter->num_counts = ARRAY_SIZE(mchp_tc_counts);
+ counter->counts = mchp_tc_counts;
+ counter->num_signals = ARRAY_SIZE(mchp_tc_count_signals);
+ counter->signals = mchp_tc_count_signals;
+
+ ret = devm_counter_add(&pdev->dev, counter);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "Failed to add counter\n");
+
+ return 0;
}
static const struct of_device_id mchp_tc_dt_ids[] = {
diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
index 5168833b1fdf..68031d93ce89 100644
--- a/drivers/counter/stm32-lptimer-cnt.c
+++ b/drivers/counter/stm32-lptimer-cnt.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
struct stm32_lptim_cnt {
- struct counter_device counter;
struct device *dev;
struct regmap *regmap;
struct clk *clk;
@@ -141,7 +140,7 @@ static const enum counter_synapse_action stm32_lptim_cnt_synapse_actions[] = {
static int stm32_lptim_cnt_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
u32 cnt;
int ret;
@@ -158,7 +157,7 @@ static int stm32_lptim_cnt_function_read(struct counter_device *counter,
struct counter_count *count,
enum counter_function *function)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
if (!priv->quadrature_mode) {
*function = COUNTER_FUNCTION_INCREASE;
@@ -177,7 +176,7 @@ static int stm32_lptim_cnt_function_write(struct counter_device *counter,
struct counter_count *count,
enum counter_function function)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
if (stm32_lptim_is_enabled(priv))
return -EBUSY;
@@ -200,7 +199,7 @@ static int stm32_lptim_cnt_enable_read(struct counter_device *counter,
struct counter_count *count,
u8 *enable)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
int ret;
ret = stm32_lptim_is_enabled(priv);
@@ -216,7 +215,7 @@ static int stm32_lptim_cnt_enable_write(struct counter_device *counter,
struct counter_count *count,
u8 enable)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
int ret;
/* Check nobody uses the timer, or already disabled/enabled */
@@ -241,7 +240,7 @@ static int stm32_lptim_cnt_ceiling_read(struct counter_device *counter,
struct counter_count *count,
u64 *ceiling)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
*ceiling = priv->ceiling;
@@ -252,7 +251,7 @@ static int stm32_lptim_cnt_ceiling_write(struct counter_device *counter,
struct counter_count *count,
u64 ceiling)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
if (stm32_lptim_is_enabled(priv))
return -EBUSY;
@@ -277,7 +276,7 @@ static int stm32_lptim_cnt_action_read(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action *action)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
enum counter_function function;
int err;
@@ -321,7 +320,7 @@ static int stm32_lptim_cnt_action_write(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action action)
{
- struct stm32_lptim_cnt *const priv = counter->priv;
+ struct stm32_lptim_cnt *const priv = counter_priv(counter);
enum counter_function function;
int err;
@@ -411,14 +410,17 @@ static struct counter_count stm32_lptim_in1_counts = {
static int stm32_lptim_cnt_probe(struct platform_device *pdev)
{
struct stm32_lptimer *ddata = dev_get_drvdata(pdev->dev.parent);
+ struct counter_device *counter;
struct stm32_lptim_cnt *priv;
+ int ret;
if (IS_ERR_OR_NULL(ddata))
return -EINVAL;
- priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(&pdev->dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
+ priv = counter_priv(counter);
priv->dev = &pdev->dev;
priv->regmap = ddata->regmap;
@@ -426,23 +428,26 @@ static int stm32_lptim_cnt_probe(struct platform_device *pdev)
priv->ceiling = STM32_LPTIM_MAX_ARR;
/* Initialize Counter device */
- priv->counter.name = dev_name(&pdev->dev);
- priv->counter.parent = &pdev->dev;
- priv->counter.ops = &stm32_lptim_cnt_ops;
+ counter->name = dev_name(&pdev->dev);
+ counter->parent = &pdev->dev;
+ counter->ops = &stm32_lptim_cnt_ops;
if (ddata->has_encoder) {
- priv->counter.counts = &stm32_lptim_enc_counts;
- priv->counter.num_signals = ARRAY_SIZE(stm32_lptim_cnt_signals);
+ counter->counts = &stm32_lptim_enc_counts;
+ counter->num_signals = ARRAY_SIZE(stm32_lptim_cnt_signals);
} else {
- priv->counter.counts = &stm32_lptim_in1_counts;
- priv->counter.num_signals = 1;
+ counter->counts = &stm32_lptim_in1_counts;
+ counter->num_signals = 1;
}
- priv->counter.num_counts = 1;
- priv->counter.signals = stm32_lptim_cnt_signals;
- priv->counter.priv = priv;
+ counter->num_counts = 1;
+ counter->signals = stm32_lptim_cnt_signals;
platform_set_drvdata(pdev, priv);
- return devm_counter_register(&pdev->dev, &priv->counter);
+ ret = devm_counter_add(&pdev->dev, counter);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "Failed to add counter\n");
+
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c
index 0546e932db0c..5779ae7c73cf 100644
--- a/drivers/counter/stm32-timer-cnt.c
+++ b/drivers/counter/stm32-timer-cnt.c
@@ -29,7 +29,6 @@ struct stm32_timer_regs {
};
struct stm32_timer_cnt {
- struct counter_device counter;
struct regmap *regmap;
struct clk *clk;
u32 max_arr;
@@ -47,7 +46,7 @@ static const enum counter_function stm32_count_functions[] = {
static int stm32_count_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 cnt;
regmap_read(priv->regmap, TIM_CNT, &cnt);
@@ -59,7 +58,7 @@ static int stm32_count_read(struct counter_device *counter,
static int stm32_count_write(struct counter_device *counter,
struct counter_count *count, const u64 val)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 ceiling;
regmap_read(priv->regmap, TIM_ARR, &ceiling);
@@ -73,7 +72,7 @@ static int stm32_count_function_read(struct counter_device *counter,
struct counter_count *count,
enum counter_function *function)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 smcr;
regmap_read(priv->regmap, TIM_SMCR, &smcr);
@@ -100,7 +99,7 @@ static int stm32_count_function_write(struct counter_device *counter,
struct counter_count *count,
enum counter_function function)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 cr1, sms;
switch (function) {
@@ -140,7 +139,7 @@ static int stm32_count_direction_read(struct counter_device *counter,
struct counter_count *count,
enum counter_count_direction *direction)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 cr1;
regmap_read(priv->regmap, TIM_CR1, &cr1);
@@ -153,7 +152,7 @@ static int stm32_count_direction_read(struct counter_device *counter,
static int stm32_count_ceiling_read(struct counter_device *counter,
struct counter_count *count, u64 *ceiling)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 arr;
regmap_read(priv->regmap, TIM_ARR, &arr);
@@ -166,7 +165,7 @@ static int stm32_count_ceiling_read(struct counter_device *counter,
static int stm32_count_ceiling_write(struct counter_device *counter,
struct counter_count *count, u64 ceiling)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
if (ceiling > priv->max_arr)
return -ERANGE;
@@ -181,7 +180,7 @@ static int stm32_count_ceiling_write(struct counter_device *counter,
static int stm32_count_enable_read(struct counter_device *counter,
struct counter_count *count, u8 *enable)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 cr1;
regmap_read(priv->regmap, TIM_CR1, &cr1);
@@ -194,7 +193,7 @@ static int stm32_count_enable_read(struct counter_device *counter,
static int stm32_count_enable_write(struct counter_device *counter,
struct counter_count *count, u8 enable)
{
- struct stm32_timer_cnt *const priv = counter->priv;
+ struct stm32_timer_cnt *const priv = counter_priv(counter);
u32 cr1;
if (enable) {
@@ -317,31 +316,38 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev)
struct stm32_timers *ddata = dev_get_drvdata(pdev->dev.parent);
struct device *dev = &pdev->dev;
struct stm32_timer_cnt *priv;
+ struct counter_device *counter;
+ int ret;
if (IS_ERR_OR_NULL(ddata))
return -EINVAL;
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
+ priv = counter_priv(counter);
+
priv->regmap = ddata->regmap;
priv->clk = ddata->clk;
priv->max_arr = ddata->max_arr;
- priv->counter.name = dev_name(dev);
- priv->counter.parent = dev;
- priv->counter.ops = &stm32_timer_cnt_ops;
- priv->counter.counts = &stm32_counts;
- priv->counter.num_counts = 1;
- priv->counter.signals = stm32_signals;
- priv->counter.num_signals = ARRAY_SIZE(stm32_signals);
- priv->counter.priv = priv;
+ counter->name = dev_name(dev);
+ counter->parent = dev;
+ counter->ops = &stm32_timer_cnt_ops;
+ counter->counts = &stm32_counts;
+ counter->num_counts = 1;
+ counter->signals = stm32_signals;
+ counter->num_signals = ARRAY_SIZE(stm32_signals);
platform_set_drvdata(pdev, priv);
/* Register Counter device */
- return devm_counter_register(dev, &priv->counter);
+ ret = devm_counter_add(dev, counter);
+ if (ret < 0)
+ dev_err_probe(dev, ret, "Failed to add counter\n");
+
+ return ret;
}
static int __maybe_unused stm32_timer_cnt_suspend(struct device *dev)
diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
index 09817c953f9a..0489d26eb47c 100644
--- a/drivers/counter/ti-eqep.c
+++ b/drivers/counter/ti-eqep.c
@@ -87,10 +87,15 @@ struct ti_eqep_cnt {
struct regmap *regmap16;
};
+static struct ti_eqep_cnt *ti_eqep_count_from_counter(struct counter_device *counter)
+{
+ return counter_priv(counter);
+}
+
static int ti_eqep_count_read(struct counter_device *counter,
struct counter_count *count, u64 *val)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
u32 cnt;
regmap_read(priv->regmap32, QPOSCNT, &cnt);
@@ -102,7 +107,7 @@ static int ti_eqep_count_read(struct counter_device *counter,
static int ti_eqep_count_write(struct counter_device *counter,
struct counter_count *count, u64 val)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
u32 max;
regmap_read(priv->regmap32, QPOSMAX, &max);
@@ -116,7 +121,7 @@ static int ti_eqep_function_read(struct counter_device *counter,
struct counter_count *count,
enum counter_function *function)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
u32 qdecctl;
regmap_read(priv->regmap16, QDECCTL, &qdecctl);
@@ -143,7 +148,7 @@ static int ti_eqep_function_write(struct counter_device *counter,
struct counter_count *count,
enum counter_function function)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
enum ti_eqep_count_func qsrc;
switch (function) {
@@ -173,7 +178,7 @@ static int ti_eqep_action_read(struct counter_device *counter,
struct counter_synapse *synapse,
enum counter_synapse_action *action)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
enum counter_function function;
u32 qdecctl;
int err;
@@ -245,7 +250,7 @@ static int ti_eqep_position_ceiling_read(struct counter_device *counter,
struct counter_count *count,
u64 *ceiling)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
u32 qposmax;
regmap_read(priv->regmap32, QPOSMAX, &qposmax);
@@ -259,7 +264,7 @@ static int ti_eqep_position_ceiling_write(struct counter_device *counter,
struct counter_count *count,
u64 ceiling)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
if (ceiling != (u32)ceiling)
return -ERANGE;
@@ -272,7 +277,7 @@ static int ti_eqep_position_ceiling_write(struct counter_device *counter,
static int ti_eqep_position_enable_read(struct counter_device *counter,
struct counter_count *count, u8 *enable)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
u32 qepctl;
regmap_read(priv->regmap16, QEPCTL, &qepctl);
@@ -285,7 +290,7 @@ static int ti_eqep_position_enable_read(struct counter_device *counter,
static int ti_eqep_position_enable_write(struct counter_device *counter,
struct counter_count *count, u8 enable)
{
- struct ti_eqep_cnt *priv = counter->priv;
+ struct ti_eqep_cnt *priv = ti_eqep_count_from_counter(counter);
regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, enable ? -1 : 0);
@@ -368,13 +373,15 @@ static const struct regmap_config ti_eqep_regmap16_config = {
static int ti_eqep_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct counter_device *counter;
struct ti_eqep_cnt *priv;
void __iomem *base;
int err;
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ counter = devm_counter_alloc(dev, sizeof(*priv));
+ if (!counter)
return -ENOMEM;
+ priv = counter_priv(counter);
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
@@ -390,16 +397,15 @@ static int ti_eqep_probe(struct platform_device *pdev)
if (IS_ERR(priv->regmap16))
return PTR_ERR(priv->regmap16);
- priv->counter.name = dev_name(dev);
- priv->counter.parent = dev;
- priv->counter.ops = &ti_eqep_counter_ops;
- priv->counter.counts = ti_eqep_counts;
- priv->counter.num_counts = ARRAY_SIZE(ti_eqep_counts);
- priv->counter.signals = ti_eqep_signals;
- priv->counter.num_signals = ARRAY_SIZE(ti_eqep_signals);
- priv->counter.priv = priv;
+ counter->name = dev_name(dev);
+ counter->parent = dev;
+ counter->ops = &ti_eqep_counter_ops;
+ counter->counts = ti_eqep_counts;
+ counter->num_counts = ARRAY_SIZE(ti_eqep_counts);
+ counter->signals = ti_eqep_signals;
+ counter->num_signals = ARRAY_SIZE(ti_eqep_signals);
- platform_set_drvdata(pdev, priv);
+ platform_set_drvdata(pdev, counter);
/*
* Need to make sure power is turned on. On AM33xx, this comes from the
@@ -409,7 +415,7 @@ static int ti_eqep_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
- err = counter_register(&priv->counter);
+ err = counter_add(counter);
if (err < 0) {
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
@@ -421,10 +427,10 @@ static int ti_eqep_probe(struct platform_device *pdev)
static int ti_eqep_remove(struct platform_device *pdev)
{
- struct ti_eqep_cnt *priv = platform_get_drvdata(pdev);
+ struct counter_device *counter = platform_get_drvdata(pdev);
struct device *dev = &pdev->dev;
- counter_unregister(&priv->counter);
+ counter_unregister(counter);
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index e2375d992308..8e977b7627cb 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -404,7 +404,7 @@ static int virtcrypto_probe(struct virtio_device *vdev)
free_engines:
virtcrypto_clear_crypto_engines(vcrypto);
free_vqs:
- vcrypto->vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtcrypto_del_vqs(vcrypto);
free_dev:
virtcrypto_devmgr_rm_dev(vcrypto);
@@ -436,7 +436,7 @@ static void virtcrypto_remove(struct virtio_device *vdev)
if (virtcrypto_dev_started(vcrypto))
virtcrypto_dev_stop(vcrypto);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtcrypto_free_unused_reqs(vcrypto);
virtcrypto_clear_crypto_engines(vcrypto);
virtcrypto_del_vqs(vcrypto);
@@ -456,7 +456,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev)
{
struct virtio_crypto *vcrypto = vdev->priv;
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtcrypto_free_unused_reqs(vcrypto);
if (virtcrypto_dev_started(vcrypto))
virtcrypto_dev_stop(vcrypto);
@@ -492,7 +492,7 @@ static int virtcrypto_restore(struct virtio_device *vdev)
free_engines:
virtcrypto_clear_crypto_engines(vcrypto);
free_vqs:
- vcrypto->vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtcrypto_del_vqs(vcrypto);
return err;
}
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index ee4568ef757c..1dad813ee4a6 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -127,11 +127,35 @@ ATTRIBUTE_GROUPS(dax_drv);
static int dax_bus_match(struct device *dev, struct device_driver *drv);
+/*
+ * Static dax regions are regions created by an external subsystem
+ * nvdimm where a single range is assigned. Its boundaries are by the external
+ * subsystem and are usually limited to one physical memory range. For example,
+ * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
+ * single contiguous range)
+ *
+ * On dynamic dax regions, the assigned region can be partitioned by dax core
+ * into multiple subdivisions. A subdivision is represented into one
+ * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
+ *
+ * When allocating a dax region, drivers must set whether it's static
+ * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned
+ * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
+ * devices it is NULL but afterwards allocated by dax core on device ->probe().
+ * Care is needed to make sure that dynamic dax devices are torn down with a
+ * cleared @pgmap field (see kill_dev_dax()).
+ */
static bool is_static(struct dax_region *dax_region)
{
return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
}
+bool static_dev_dax(struct dev_dax *dev_dax)
+{
+ return is_static(dev_dax->region);
+}
+EXPORT_SYMBOL_GPL(static_dev_dax);
+
static u64 dev_dax_size(struct dev_dax *dev_dax)
{
u64 size = 0;
@@ -361,6 +385,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
kill_dax(dax_dev);
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ /*
+ * Dynamic dax region have the pgmap allocated via dev_kzalloc()
+ * and thus freed by devm. Clear the pgmap to not have stale pgmap
+ * ranges on probe() from previous reconfigurations of region devices.
+ */
+ if (!static_dev_dax(dev_dax))
+ dev_dax->pgmap = NULL;
}
EXPORT_SYMBOL_GPL(kill_dev_dax);
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 381cec9ff05c..fbb940293d6d 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -39,6 +39,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax);
+bool static_dev_dax(struct dev_dax *dev_dax);
/*
* While run_dax() is potentially a generic operation that could be
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index e58d597f0415..d33a0613ed0c 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -73,11 +73,39 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
return -1;
}
+static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
+ unsigned long fault_size)
+{
+ unsigned long i, nr_pages = fault_size / PAGE_SIZE;
+ struct file *filp = vmf->vma->vm_file;
+ struct dev_dax *dev_dax = filp->private_data;
+ pgoff_t pgoff;
+
+ /* mapping is only set on the head */
+ if (dev_dax->pgmap->vmemmap_shift)
+ nr_pages = 1;
+
+ pgoff = linear_page_index(vmf->vma,
+ ALIGN(vmf->address, fault_size));
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+
+ page = compound_head(page);
+ if (page->mapping)
+ continue;
+
+ page->mapping = filp->f_mapping;
+ page->index = pgoff + i;
+ }
+}
+
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
+ pfn_t pfn;
unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -98,18 +126,21 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+ dax_set_mapping(vmf, pfn, fault_size);
+
+ return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
}
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
+ pfn_t pfn;
unsigned int fault_size = PMD_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -138,19 +169,22 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+ dax_set_mapping(vmf, pfn, fault_size);
+
+ return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
+ pfn_t pfn;
unsigned int fault_size = PUD_SIZE;
@@ -180,13 +214,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+ dax_set_mapping(vmf, pfn, fault_size);
+
+ return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
}
#else
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
return VM_FAULT_FALLBACK;
}
@@ -196,10 +232,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
struct file *filp = vmf->vma->vm_file;
- unsigned long fault_size;
vm_fault_t rc = VM_FAULT_SIGBUS;
int id;
- pfn_t pfn;
struct dev_dax *dev_dax = filp->private_data;
dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
@@ -209,43 +243,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
id = dax_read_lock();
switch (pe_size) {
case PE_SIZE_PTE:
- fault_size = PAGE_SIZE;
- rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
+ rc = __dev_dax_pte_fault(dev_dax, vmf);
break;
case PE_SIZE_PMD:
- fault_size = PMD_SIZE;
- rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
+ rc = __dev_dax_pmd_fault(dev_dax, vmf);
break;
case PE_SIZE_PUD:
- fault_size = PUD_SIZE;
- rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
+ rc = __dev_dax_pud_fault(dev_dax, vmf);
break;
default:
rc = VM_FAULT_SIGBUS;
}
- if (rc == VM_FAULT_NOPAGE) {
- unsigned long i;
- pgoff_t pgoff;
-
- /*
- * In the device-dax case the only possibility for a
- * VM_FAULT_NOPAGE result is when device-dax capacity is
- * mapped. No need to consider the zero page, or racing
- * conflicting mappings.
- */
- pgoff = linear_page_index(vmf->vma, vmf->address
- & ~(fault_size - 1));
- for (i = 0; i < fault_size / PAGE_SIZE; i++) {
- struct page *page;
-
- page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
- if (page->mapping)
- continue;
- page->mapping = filp->f_mapping;
- page->index = pgoff + i;
- }
- }
dax_read_unlock(id);
return rc;
@@ -398,17 +407,34 @@ int dev_dax_probe(struct dev_dax *dev_dax)
void *addr;
int rc, i;
- pgmap = dev_dax->pgmap;
- if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
- "static pgmap / multi-range device conflict\n"))
- return -EINVAL;
+ if (static_dev_dax(dev_dax)) {
+ if (dev_dax->nr_range > 1) {
+ dev_warn(dev,
+ "static pgmap / multi-range device conflict\n");
+ return -EINVAL;
+ }
- if (!pgmap) {
- pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
- * (dev_dax->nr_range - 1), GFP_KERNEL);
+ pgmap = dev_dax->pgmap;
+ } else {
+ if (dev_dax->pgmap) {
+ dev_warn(dev,
+ "dynamic-dax with pre-populated page map\n");
+ return -EINVAL;
+ }
+
+ pgmap = devm_kzalloc(dev,
+ struct_size(pgmap, ranges, dev_dax->nr_range - 1),
+ GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
+
pgmap->nr_range = dev_dax->nr_range;
+ dev_dax->pgmap = pgmap;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct range *range = &dev_dax->ranges[i].range;
+ pgmap->ranges[i] = *range;
+ }
}
for (i = 0; i < dev_dax->nr_range; i++) {
@@ -420,12 +446,12 @@ int dev_dax_probe(struct dev_dax *dev_dax)
i, range->start, range->end);
return -EBUSY;
}
- /* don't update the range for static pgmap */
- if (!dev_dax->pgmap)
- pgmap->ranges[i] = *range;
}
pgmap->type = MEMORY_DEVICE_GENERIC;
+ if (dev_dax->align > PAGE_SIZE)
+ pgmap->vmemmap_shift =
+ order_base_2(dev_dax->align >> PAGE_SHIFT);
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 0c05b79870f9..83f02bd51dda 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -124,10 +124,11 @@ static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
struct cma_heap_buffer *buffer = dmabuf->priv;
struct dma_heap_attachment *a;
+ mutex_lock(&buffer->lock);
+
if (buffer->vmap_cnt)
invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);
- mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
if (!a->mapped)
continue;
@@ -144,10 +145,11 @@ static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
struct cma_heap_buffer *buffer = dmabuf->priv;
struct dma_heap_attachment *a;
+ mutex_lock(&buffer->lock);
+
if (buffer->vmap_cnt)
flush_kernel_vmap_range(buffer->vaddr, buffer->len);
- mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
if (!a->mapped)
continue;
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 275a76f188ae..a1da2b4b6d73 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -99,6 +99,7 @@
#define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */
#define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */
#define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */
+#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27)
#define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */
#define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */
#define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */
@@ -252,15 +253,15 @@ struct at_xdmac {
/* Linked List Descriptor */
struct at_xdmac_lld {
- dma_addr_t mbr_nda; /* Next Descriptor Member */
- u32 mbr_ubc; /* Microblock Control Member */
- dma_addr_t mbr_sa; /* Source Address Member */
- dma_addr_t mbr_da; /* Destination Address Member */
- u32 mbr_cfg; /* Configuration Register */
- u32 mbr_bc; /* Block Control Register */
- u32 mbr_ds; /* Data Stride Register */
- u32 mbr_sus; /* Source Microblock Stride Register */
- u32 mbr_dus; /* Destination Microblock Stride Register */
+ u32 mbr_nda; /* Next Descriptor Member */
+ u32 mbr_ubc; /* Microblock Control Member */
+ u32 mbr_sa; /* Source Address Member */
+ u32 mbr_da; /* Destination Address Member */
+ u32 mbr_cfg; /* Configuration Register */
+ u32 mbr_bc; /* Block Control Register */
+ u32 mbr_ds; /* Data Stride Register */
+ u32 mbr_sus; /* Source Microblock Stride Register */
+ u32 mbr_dus; /* Destination Microblock Stride Register */
};
/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
@@ -385,9 +386,6 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
- if (at_xdmac_chan_is_enabled(atchan))
- return;
-
/* Set transfer as active to not try to start it again. */
first->active_xfer = true;
@@ -405,7 +403,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
*/
if (at_xdmac_chan_is_cyclic(atchan))
reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
- else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3)
+ else if ((first->lld.mbr_ubc &
+ AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3)
reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
else
reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
@@ -476,13 +475,12 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
spin_lock_irqsave(&atchan->lock, irqflags);
cookie = dma_cookie_assign(tx);
+ list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+
dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
__func__, atchan, desc);
- list_add_tail(&desc->xfer_node, &atchan->xfers_list);
- if (list_is_singular(&atchan->xfers_list))
- at_xdmac_start_xfer(atchan, desc);
- spin_unlock_irqrestore(&atchan->lock, irqflags);
return cookie;
}
@@ -733,7 +731,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
if (!desc) {
dev_err(chan2dev(chan), "can't get descriptor\n");
if (first)
- list_splice_init(&first->descs_list, &atchan->free_descs_list);
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
goto spin_unlock;
}
@@ -821,7 +820,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
if (!desc) {
dev_err(chan2dev(chan), "can't get descriptor\n");
if (first)
- list_splice_init(&first->descs_list, &atchan->free_descs_list);
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
spin_unlock_irqrestore(&atchan->lock, irqflags);
return NULL;
}
@@ -1055,8 +1055,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
src_addr, dst_addr,
xt, chunk);
if (!desc) {
- list_splice_init(&first->descs_list,
- &atchan->free_descs_list);
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
return NULL;
}
@@ -1136,7 +1136,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
if (!desc) {
dev_err(chan2dev(chan), "can't get descriptor\n");
if (first)
- list_splice_init(&first->descs_list, &atchan->free_descs_list);
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
return NULL;
}
@@ -1312,8 +1313,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
sg_dma_len(sg),
value);
if (!desc && first)
- list_splice_init(&first->descs_list,
- &atchan->free_descs_list);
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
if (!first)
first = desc;
@@ -1586,20 +1587,6 @@ spin_unlock:
return ret;
}
-/* Call must be protected by lock. */
-static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
- struct at_xdmac_desc *desc)
-{
- dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
-
- /*
- * Remove the transfer from the transfer list then move the transfer
- * descriptors into the free descriptors list.
- */
- list_del(&desc->xfer_node);
- list_splice_init(&desc->descs_list, &atchan->free_descs_list);
-}
-
static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
{
struct at_xdmac_desc *desc;
@@ -1608,14 +1595,14 @@ static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
* If channel is enabled, do nothing, advance_work will be triggered
* after the interruption.
*/
- if (!at_xdmac_chan_is_enabled(atchan) && !list_empty(&atchan->xfers_list)) {
- desc = list_first_entry(&atchan->xfers_list,
- struct at_xdmac_desc,
- xfer_node);
- dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
- if (!desc->active_xfer)
- at_xdmac_start_xfer(atchan, desc);
- }
+ if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list))
+ return;
+
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+ if (!desc->active_xfer)
+ at_xdmac_start_xfer(atchan, desc);
}
static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
@@ -1623,16 +1610,22 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
struct at_xdmac_desc *desc;
struct dma_async_tx_descriptor *txd;
- if (!list_empty(&atchan->xfers_list)) {
- desc = list_first_entry(&atchan->xfers_list,
- struct at_xdmac_desc, xfer_node);
- txd = &desc->tx_dma_desc;
-
- if (txd->flags & DMA_PREP_INTERRUPT)
- dmaengine_desc_get_callback_invoke(txd, NULL);
+ spin_lock_irq(&atchan->lock);
+ dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
+ __func__, atchan->irq_status);
+ if (list_empty(&atchan->xfers_list)) {
+ spin_unlock_irq(&atchan->lock);
+ return;
}
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ spin_unlock_irq(&atchan->lock);
+ txd = &desc->tx_dma_desc;
+ if (txd->flags & DMA_PREP_INTERRUPT)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
}
+/* Called with atchan->lock held. */
static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
{
struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
@@ -1651,8 +1644,6 @@ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
if (atchan->irq_status & AT_XDMAC_CIS_ROIS)
dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
- spin_lock_irq(&atchan->lock);
-
/* Channel must be disabled first as it's not done automatically */
at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
@@ -1662,8 +1653,6 @@ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
struct at_xdmac_desc,
xfer_node);
- spin_unlock_irq(&atchan->lock);
-
/* Print bad descriptor's details if needed */
dev_dbg(chan2dev(&atchan->chan),
"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
@@ -1677,50 +1666,52 @@ static void at_xdmac_tasklet(struct tasklet_struct *t)
{
struct at_xdmac_chan *atchan = from_tasklet(atchan, t, tasklet);
struct at_xdmac_desc *desc;
+ struct dma_async_tx_descriptor *txd;
u32 error_mask;
+ if (at_xdmac_chan_is_cyclic(atchan))
+ return at_xdmac_handle_cyclic(atchan);
+
+ error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS |
+ AT_XDMAC_CIS_ROIS;
+
+ spin_lock_irq(&atchan->lock);
+
dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
__func__, atchan->irq_status);
- error_mask = AT_XDMAC_CIS_RBEIS
- | AT_XDMAC_CIS_WBEIS
- | AT_XDMAC_CIS_ROIS;
-
- if (at_xdmac_chan_is_cyclic(atchan)) {
- at_xdmac_handle_cyclic(atchan);
- } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS)
- || (atchan->irq_status & error_mask)) {
- struct dma_async_tx_descriptor *txd;
-
- if (atchan->irq_status & error_mask)
- at_xdmac_handle_error(atchan);
-
- spin_lock_irq(&atchan->lock);
- desc = list_first_entry(&atchan->xfers_list,
- struct at_xdmac_desc,
- xfer_node);
- dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
- if (!desc->active_xfer) {
- dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting");
- spin_unlock_irq(&atchan->lock);
- return;
- }
+ if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) &&
+ !(atchan->irq_status & error_mask))
+ return;
- txd = &desc->tx_dma_desc;
+ if (atchan->irq_status & error_mask)
+ at_xdmac_handle_error(atchan);
- at_xdmac_remove_xfer(atchan, desc);
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+ if (!desc->active_xfer) {
+ dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting");
spin_unlock_irq(&atchan->lock);
+ return;
+ }
- dma_cookie_complete(txd);
- if (txd->flags & DMA_PREP_INTERRUPT)
- dmaengine_desc_get_callback_invoke(txd, NULL);
+ txd = &desc->tx_dma_desc;
+ dma_cookie_complete(txd);
+ /* Remove the transfer from the transfer list. */
+ list_del(&desc->xfer_node);
+ spin_unlock_irq(&atchan->lock);
- dma_run_dependencies(txd);
+ if (txd->flags & DMA_PREP_INTERRUPT)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
- spin_lock_irq(&atchan->lock);
- at_xdmac_advance_work(atchan);
- spin_unlock_irq(&atchan->lock);
- }
+ dma_run_dependencies(txd);
+
+ spin_lock_irq(&atchan->lock);
+ /* Move the xfer descriptors into the free descriptors list. */
+ list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list);
+ at_xdmac_advance_work(atchan);
+ spin_unlock_irq(&atchan->lock);
}
static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
@@ -1784,11 +1775,9 @@ static void at_xdmac_issue_pending(struct dma_chan *chan)
dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
- if (!at_xdmac_chan_is_cyclic(atchan)) {
- spin_lock_irqsave(&atchan->lock, flags);
- at_xdmac_advance_work(atchan);
- spin_unlock_irqrestore(&atchan->lock, flags);
- }
+ spin_lock_irqsave(&atchan->lock, flags);
+ at_xdmac_advance_work(atchan);
+ spin_unlock_irqrestore(&atchan->lock, flags);
return;
}
@@ -1866,8 +1855,11 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
cpu_relax();
/* Cancel all pending transfers. */
- list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
- at_xdmac_remove_xfer(atchan, desc);
+ list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
+ list_del(&desc->xfer_node);
+ list_splice_tail_init(&desc->descs_list,
+ &atchan->free_descs_list);
+ }
clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
@@ -2031,7 +2023,7 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev)
static int at_xdmac_probe(struct platform_device *pdev)
{
struct at_xdmac *atxdmac;
- int irq, size, nr_channels, i, ret;
+ int irq, nr_channels, i, ret;
void __iomem *base;
u32 reg;
@@ -2056,9 +2048,9 @@ static int at_xdmac_probe(struct platform_device *pdev)
return -EINVAL;
}
- size = sizeof(*atxdmac);
- size += nr_channels * sizeof(struct at_xdmac_chan);
- atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+ atxdmac = devm_kzalloc(&pdev->dev,
+ struct_size(atxdmac, chan, nr_channels),
+ GFP_KERNEL);
if (!atxdmac) {
dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
return -ENOMEM;
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index 96701dedcac8..fc513eb2b289 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -104,10 +104,10 @@
* descriptor base address in the upper 8 bits.
*/
struct jz4780_dma_hwdesc {
- uint32_t dcm;
- uint32_t dsa;
- uint32_t dta;
- uint32_t dtc;
+ u32 dcm;
+ u32 dsa;
+ u32 dta;
+ u32 dtc;
};
/* Size of allocations for hardware descriptor blocks. */
@@ -122,7 +122,8 @@ struct jz4780_dma_desc {
dma_addr_t desc_phys;
unsigned int count;
enum dma_transaction_type type;
- uint32_t status;
+ u32 transfer_type;
+ u32 status;
};
struct jz4780_dma_chan {
@@ -130,8 +131,8 @@ struct jz4780_dma_chan {
unsigned int id;
struct dma_pool *desc_pool;
- uint32_t transfer_type;
- uint32_t transfer_shift;
+ u32 transfer_type_tx, transfer_type_rx;
+ u32 transfer_shift;
struct dma_slave_config config;
struct jz4780_dma_desc *desc;
@@ -152,12 +153,12 @@ struct jz4780_dma_dev {
unsigned int irq;
const struct jz4780_dma_soc_data *soc_data;
- uint32_t chan_reserved;
+ u32 chan_reserved;
struct jz4780_dma_chan chan[];
};
struct jz4780_dma_filter_data {
- uint32_t transfer_type;
+ u32 transfer_type_tx, transfer_type_rx;
int channel;
};
@@ -179,26 +180,26 @@ static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
dma_device);
}
-static inline uint32_t jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma,
+static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma,
unsigned int chn, unsigned int reg)
{
return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
}
static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma,
- unsigned int chn, unsigned int reg, uint32_t val)
+ unsigned int chn, unsigned int reg, u32 val)
{
writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
}
-static inline uint32_t jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma,
+static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma,
unsigned int reg)
{
return readl(jzdma->ctrl_base + reg);
}
static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma,
- unsigned int reg, uint32_t val)
+ unsigned int reg, u32 val)
{
writel(val, jzdma->ctrl_base + reg);
}
@@ -226,9 +227,10 @@ static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma,
jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn));
}
-static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
- struct jz4780_dma_chan *jzchan, unsigned int count,
- enum dma_transaction_type type)
+static struct jz4780_dma_desc *
+jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count,
+ enum dma_transaction_type type,
+ enum dma_transfer_direction direction)
{
struct jz4780_dma_desc *desc;
@@ -248,6 +250,12 @@ static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
desc->count = count;
desc->type = type;
+
+ if (direction == DMA_DEV_TO_MEM)
+ desc->transfer_type = jzchan->transfer_type_rx;
+ else
+ desc->transfer_type = jzchan->transfer_type_tx;
+
return desc;
}
@@ -260,8 +268,8 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
kfree(desc);
}
-static uint32_t jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan,
- unsigned long val, uint32_t *shift)
+static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan,
+ unsigned long val, u32 *shift)
{
struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
int ord = ffs(val) - 1;
@@ -303,7 +311,7 @@ static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
enum dma_transfer_direction direction)
{
struct dma_slave_config *config = &jzchan->config;
- uint32_t width, maxburst, tsz;
+ u32 width, maxburst, tsz;
if (direction == DMA_MEM_TO_DEV) {
desc->dcm = JZ_DMA_DCM_SAI;
@@ -361,7 +369,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
unsigned int i;
int err;
- desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE);
+ desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction);
if (!desc)
return NULL;
@@ -410,7 +418,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
periods = buf_len / period_len;
- desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC);
+ desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction);
if (!desc)
return NULL;
@@ -453,16 +461,16 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
{
struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
struct jz4780_dma_desc *desc;
- uint32_t tsz;
+ u32 tsz;
- desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
+ desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0);
if (!desc)
return NULL;
tsz = jz4780_dma_transfer_size(jzchan, dest | src | len,
&jzchan->transfer_shift);
- jzchan->transfer_type = JZ_DMA_DRT_AUTO;
+ desc->transfer_type = JZ_DMA_DRT_AUTO;
desc->desc[0].dsa = src;
desc->desc[0].dta = dest;
@@ -528,7 +536,7 @@ static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
/* Set transfer type. */
jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT,
- jzchan->transfer_type);
+ jzchan->desc->transfer_type);
/*
* Set the transfer count. This is redundant for a descriptor-driven
@@ -670,7 +678,7 @@ static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
{
const unsigned int soc_flags = jzdma->soc_data->flags;
struct jz4780_dma_desc *desc = jzchan->desc;
- uint32_t dcs;
+ u32 dcs;
bool ack = true;
spin_lock(&jzchan->vchan.lock);
@@ -727,7 +735,7 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
struct jz4780_dma_dev *jzdma = data;
unsigned int nb_channels = jzdma->soc_data->nb_channels;
unsigned long pending;
- uint32_t dmac;
+ u32 dmac;
int i;
pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
@@ -788,7 +796,8 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
return false;
}
- jzchan->transfer_type = data->transfer_type;
+ jzchan->transfer_type_tx = data->transfer_type_tx;
+ jzchan->transfer_type_rx = data->transfer_type_rx;
return true;
}
@@ -800,11 +809,17 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
struct jz4780_dma_filter_data data;
- if (dma_spec->args_count != 2)
+ if (dma_spec->args_count == 2) {
+ data.transfer_type_tx = dma_spec->args[0];
+ data.transfer_type_rx = dma_spec->args[0];
+ data.channel = dma_spec->args[1];
+ } else if (dma_spec->args_count == 3) {
+ data.transfer_type_tx = dma_spec->args[0];
+ data.transfer_type_rx = dma_spec->args[1];
+ data.channel = dma_spec->args[2];
+ } else {
return NULL;
-
- data.transfer_type = dma_spec->args[0];
- data.channel = dma_spec->args[1];
+ }
if (data.channel > -1) {
if (data.channel >= jzdma->soc_data->nb_channels) {
@@ -822,7 +837,8 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
return NULL;
}
- jzdma->chan[data.channel].transfer_type = data.transfer_type;
+ jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx;
+ jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx;
return dma_get_slave_channel(
&jzdma->chan[data.channel].vchan.chan);
@@ -938,6 +954,14 @@ static int jz4780_dma_probe(struct platform_device *pdev)
jzchan->vchan.desc_free = jz4780_dma_desc_free;
}
+ /*
+ * On JZ4760, chan0 won't enable properly the first time.
+ * Enabling then disabling chan1 will magically make chan0 work
+ * correctly.
+ */
+ jz4780_dma_chan_enable(jzdma, 1);
+ jz4780_dma_chan_disable(jzdma, 1);
+
ret = platform_get_irq(pdev, 0);
if (ret < 0)
goto err_disable_clk;
@@ -1011,12 +1035,36 @@ static const struct jz4780_dma_soc_data jz4760_dma_soc_data = {
.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
};
+static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = {
+ .nb_channels = 2,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
+static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = {
+ .nb_channels = 3,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = {
.nb_channels = 5,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM,
};
+static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = {
+ .nb_channels = 2,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
+static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = {
+ .nb_channels = 3,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
.nb_channels = 6,
.transfer_ord_max = 6,
@@ -1045,7 +1093,11 @@ static const struct of_device_id jz4780_dma_dt_match[] = {
{ .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
{ .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
{ .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data },
+ { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data },
+ { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data },
{ .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data },
+ { .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data },
+ { .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data },
{ .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
{ .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
{ .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index d9f7c097cfd6..2cfa8458b51b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1159,6 +1159,13 @@ int dma_async_device_register(struct dma_device *device)
return -EIO;
}
+ if (dma_has_cap(DMA_MEMCPY_SG, device->cap_mask) && !device->device_prep_dma_memcpy_sg) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_MEMCPY_SG");
+ return -EIO;
+ }
+
if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) {
dev_err(device->dev,
"Device claims capability %s, but op is not defined\n",
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index fab412349f7f..573ad8b86804 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -19,30 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
/* Interrupt control bits */
-void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
-{
- struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
-
- pci_msi_mask_irq(data);
-}
-
-void idxd_mask_msix_vectors(struct idxd_device *idxd)
-{
- struct pci_dev *pdev = idxd->pdev;
- int msixcnt = pci_msix_vec_count(pdev);
- int i;
-
- for (i = 0; i < msixcnt; i++)
- idxd_mask_msix_vector(idxd, i);
-}
-
-void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
-{
- struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
-
- pci_msi_unmask_irq(data);
-}
-
void idxd_unmask_error_interrupts(struct idxd_device *idxd)
{
union genctrl_reg genctrl;
@@ -382,14 +358,24 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
lockdep_assert_held(&wq->wq_lock);
memset(wq->wqcfg, 0, idxd->wqcfg_size);
wq->type = IDXD_WQT_NONE;
- wq->size = 0;
- wq->group = NULL;
wq->threshold = 0;
wq->priority = 0;
wq->ats_dis = 0;
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
memset(wq->name, 0, WQ_NAME_SIZE);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
+}
+
+static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
+{
+ lockdep_assert_held(&wq->wq_lock);
+
+ idxd_wq_disable_cleanup(wq);
+ wq->size = 0;
+ wq->group = NULL;
}
static void idxd_wq_ref_release(struct percpu_ref *ref)
@@ -404,19 +390,31 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
int rc;
memset(&wq->wq_active, 0, sizeof(wq->wq_active));
- rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
+ rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
if (rc < 0)
return rc;
reinit_completion(&wq->wq_dead);
+ reinit_completion(&wq->wq_resurrect);
return 0;
}
-void idxd_wq_quiesce(struct idxd_wq *wq)
+void __idxd_wq_quiesce(struct idxd_wq *wq)
{
+ lockdep_assert_held(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
percpu_ref_kill(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
wait_for_completion(&wq->wq_dead);
}
+void idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ mutex_lock(&wq->wq_lock);
+ __idxd_wq_quiesce(wq);
+ mutex_unlock(&wq->wq_lock);
+}
+
/* Device control bits */
static inline bool idxd_is_enabled(struct idxd_device *idxd)
{
@@ -572,7 +570,6 @@ void idxd_device_reset(struct idxd_device *idxd)
idxd_device_clear_state(idxd);
idxd->state = IDXD_DEV_DISABLED;
idxd_unmask_error_interrupts(idxd);
- idxd_msix_perm_setup(idxd);
spin_unlock(&idxd->dev_lock);
}
@@ -681,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd)
memset(&group->grpcfg, 0, sizeof(group->grpcfg));
group->num_engines = 0;
group->num_wqs = 0;
- group->use_token_limit = false;
- group->tokens_allowed = 0;
- group->tokens_reserved = 0;
+ group->use_rdbuf_limit = false;
+ group->rdbufs_allowed = 0;
+ group->rdbufs_reserved = 0;
group->tc_a = -1;
group->tc_b = -1;
}
@@ -699,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
+ idxd_wq_device_reset_cleanup(wq);
wq->state = IDXD_WQ_DISABLED;
}
}
@@ -711,36 +709,6 @@ void idxd_device_clear_state(struct idxd_device *idxd)
idxd_device_wqs_clear_state(idxd);
}
-void idxd_msix_perm_setup(struct idxd_device *idxd)
-{
- union msix_perm mperm;
- int i, msixcnt;
-
- msixcnt = pci_msix_vec_count(idxd->pdev);
- if (msixcnt < 0)
- return;
-
- mperm.bits = 0;
- mperm.pasid = idxd->pasid;
- mperm.pasid_en = device_pasid_enabled(idxd);
- for (i = 1; i < msixcnt; i++)
- iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
-}
-
-void idxd_msix_perm_clear(struct idxd_device *idxd)
-{
- union msix_perm mperm;
- int i, msixcnt;
-
- msixcnt = pci_msix_vec_count(idxd->pdev);
- if (msixcnt < 0)
- return;
-
- mperm.bits = 0;
- for (i = 1; i < msixcnt; i++)
- iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
-}
-
static void idxd_group_config_write(struct idxd_group *group)
{
struct idxd_device *idxd = group->idxd;
@@ -780,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
int i;
struct device *dev = &idxd->pdev->dev;
- /* Setup bandwidth token limit */
- if (idxd->hw.gen_cap.config_en && idxd->token_limit) {
+ /* Setup bandwidth rdbuf limit */
+ if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- reg.token_limit = idxd->token_limit;
+ reg.rdbuf_limit = idxd->rdbuf_limit;
iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
}
@@ -827,15 +795,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset);
}
+ if (wq->size == 0 && wq->type != IDXD_WQT_NONE)
+ wq->size = WQ_DEFAULT_QUEUE_DEPTH;
+
/* byte 0-3 */
wq->wqcfg->wq_size = wq->size;
- if (wq->size == 0) {
- idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE;
- dev_warn(dev, "Incorrect work queue size: 0\n");
- return -EINVAL;
- }
-
/* bytes 4-7 */
wq->wqcfg->wq_thresh = wq->threshold;
@@ -924,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
group->tc_b = group->grpcfg.flags.tc_b = 1;
else
group->grpcfg.flags.tc_b = group->tc_b;
- group->grpcfg.flags.use_token_limit = group->use_token_limit;
- group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
- if (group->tokens_allowed)
- group->grpcfg.flags.tokens_allowed =
- group->tokens_allowed;
+ group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
+ group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
+ if (group->rdbufs_allowed)
+ group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
else
- group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
+ group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
}
}
@@ -981,8 +945,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
if (!wq->group)
continue;
- if (!wq->size)
- continue;
if (wq_shared(wq) && !device_swq_supported(idxd)) {
idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
@@ -1123,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd)
int i, rc;
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- idxd->token_limit = reg.token_limit;
+ idxd->rdbuf_limit = reg.rdbuf_limit;
for (i = 0; i < idxd->max_groups; i++) {
struct idxd_group *group = idxd->groups[i];
@@ -1142,6 +1104,106 @@ int idxd_device_load_config(struct idxd_device *idxd)
return 0;
}
+static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
+{
+ struct idxd_desc *desc, *itr;
+ struct llist_node *head;
+ LIST_HEAD(flist);
+ enum idxd_complete_type ctype;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(desc, itr, head, llnode)
+ list_add_tail(&desc->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(desc, itr, &ie->work_list, list)
+ list_move_tail(&desc->list, &flist);
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(desc, itr, &flist, list) {
+ list_del(&desc->list);
+ ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
+ idxd_dma_complete_txd(desc, ctype, true);
+ }
+}
+
+static void idxd_device_set_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ union msix_perm mperm;
+
+ if (ie->pasid == INVALID_IOASID)
+ return;
+
+ mperm.bits = 0;
+ mperm.pasid = ie->pasid;
+ mperm.pasid_en = 1;
+ iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+static void idxd_device_clear_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+void idxd_wq_free_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_irq_entry *ie = &wq->ie;
+
+ synchronize_irq(ie->vector);
+ free_irq(ie->vector, ie);
+ idxd_flush_pending_descs(ie);
+ if (idxd->request_int_handles)
+ idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->vector = -1;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
+}
+
+int idxd_wq_request_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct idxd_irq_entry *ie;
+ int rc;
+
+ ie = &wq->ie;
+ ie->vector = pci_irq_vector(pdev, ie->id);
+ ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID;
+ idxd_device_set_perm_entry(idxd, ie);
+
+ rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie);
+ if (rc < 0) {
+ dev_err(dev, "Failed to request irq %d.\n", ie->vector);
+ goto err_irq;
+ }
+
+ if (idxd->request_int_handles) {
+ rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle,
+ IDXD_IRQ_MSIX);
+ if (rc < 0)
+ goto err_int_handle;
+ } else {
+ ie->int_handle = ie->id;
+ }
+
+ return 0;
+
+err_int_handle:
+ ie->int_handle = INVALID_INT_HANDLE;
+ free_irq(ie->vector, ie);
+err_irq:
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->pasid = INVALID_IOASID;
+ return rc;
+}
+
int __drv_enable_wq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index c39e9483206a..bfff59617d04 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
}
void idxd_dma_complete_txd(struct idxd_desc *desc,
- enum idxd_complete_type comp_type)
+ enum idxd_complete_type comp_type,
+ bool free_desc)
{
+ struct idxd_device *idxd = desc->wq->idxd;
struct dma_async_tx_descriptor *tx;
struct dmaengine_result res;
int complete = 1;
- if (desc->completion->status == DSA_COMP_SUCCESS)
+ if (desc->completion->status == DSA_COMP_SUCCESS) {
res.result = DMA_TRANS_NOERROR;
- else if (desc->completion->status)
+ } else if (desc->completion->status) {
+ if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT &&
+ desc->completion->status == DSA_COMP_INT_HANDLE_INVAL &&
+ idxd_queue_int_handle_resubmit(desc))
+ return;
res.result = DMA_TRANS_WRITE_FAILED;
- else if (comp_type == IDXD_COMPLETE_ABORT)
+ } else if (comp_type == IDXD_COMPLETE_ABORT) {
res.result = DMA_TRANS_ABORTED;
- else
+ } else {
complete = 0;
+ }
tx = &desc->txd;
if (complete && tx->cookie) {
@@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
tx->callback = NULL;
tx->callback_result = NULL;
}
+
+ if (free_desc)
+ idxd_free_desc(desc->wq, desc);
}
static void op_flag_setup(unsigned long flags, u32 *desc_flags)
@@ -153,8 +163,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
cookie = dma_cookie_assign(tx);
rc = idxd_submit_desc(wq, desc);
- if (rc < 0)
+ if (rc < 0) {
+ idxd_free_desc(wq, desc);
return rc;
+ }
return cookie;
}
@@ -277,6 +289,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
mutex_lock(&wq->wq_lock);
wq->type = IDXD_WQT_KERNEL;
+
+ rc = idxd_wq_request_irq(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR;
+ dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc);
+ goto err_irq;
+ }
+
rc = __drv_enable_wq(wq);
if (rc < 0) {
dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
@@ -310,13 +330,15 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
return 0;
err_dma:
- idxd_wq_quiesce(wq);
+ __idxd_wq_quiesce(wq);
percpu_ref_exit(&wq->wq_active);
err_ref:
idxd_wq_free_resources(wq);
err_res_alloc:
__drv_disable_wq(wq);
err:
+ idxd_wq_free_irq(wq);
+err_irq:
wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
return rc;
@@ -327,11 +349,13 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
mutex_lock(&wq->wq_lock);
- idxd_wq_quiesce(wq);
+ __idxd_wq_quiesce(wq);
idxd_unregister_dma_channel(wq);
idxd_wq_free_resources(wq);
__drv_disable_wq(wq);
percpu_ref_exit(&wq->wq_active);
+ idxd_wq_free_irq(wq);
+ wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 0cf8d3145870..da72eb15f610 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -10,6 +10,7 @@
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/pci.h>
+#include <linux/ioasid.h>
#include <linux/perf_event.h>
#include <uapi/linux/idxd.h>
#include "registers.h"
@@ -51,6 +52,9 @@ enum idxd_type {
#define IDXD_NAME_SIZE 128
#define IDXD_PMU_EVENT_MAX 64
+#define IDXD_ENQCMDS_RETRIES 32
+#define IDXD_ENQCMDS_MAX_RETRIES 64
+
struct idxd_device_driver {
const char *name;
enum idxd_dev_type *type;
@@ -64,8 +68,8 @@ extern struct idxd_device_driver idxd_drv;
extern struct idxd_device_driver idxd_dmaengine_drv;
extern struct idxd_device_driver idxd_user_drv;
+#define INVALID_INT_HANDLE -1
struct idxd_irq_entry {
- struct idxd_device *idxd;
int id;
int vector;
struct llist_head pending_llist;
@@ -75,6 +79,8 @@ struct idxd_irq_entry {
* and irq thread processing error descriptor.
*/
spinlock_t list_lock;
+ int int_handle;
+ ioasid_t pasid;
};
struct idxd_group {
@@ -84,9 +90,9 @@ struct idxd_group {
int id;
int num_engines;
int num_wqs;
- bool use_token_limit;
- u8 tokens_allowed;
- u8 tokens_reserved;
+ bool use_rdbuf_limit;
+ u8 rdbufs_allowed;
+ u8 rdbufs_reserved;
int tc_a;
int tc_b;
};
@@ -145,6 +151,10 @@ struct idxd_cdev {
#define WQ_NAME_SIZE 1024
#define WQ_TYPE_SIZE 10
+#define WQ_DEFAULT_QUEUE_DEPTH 16
+#define WQ_DEFAULT_MAX_XFER SZ_2M
+#define WQ_DEFAULT_MAX_BATCH 32
+
enum idxd_op_type {
IDXD_OP_BLOCK = 0,
IDXD_OP_NONBLOCK = 1,
@@ -164,13 +174,16 @@ struct idxd_dma_chan {
struct idxd_wq {
void __iomem *portal;
u32 portal_offset;
+ unsigned int enqcmds_retries;
struct percpu_ref wq_active;
struct completion wq_dead;
+ struct completion wq_resurrect;
struct idxd_dev idxd_dev;
struct idxd_cdev *idxd_cdev;
struct wait_queue_head err_queue;
struct idxd_device *idxd;
int id;
+ struct idxd_irq_entry ie;
enum idxd_wq_type type;
struct idxd_group *group;
int client_count;
@@ -251,6 +264,7 @@ struct idxd_device {
int id;
int major;
u32 cmd_status;
+ struct idxd_irq_entry ie; /* misc irq, msix 0 */
struct pci_dev *pdev;
void __iomem *reg_base;
@@ -266,6 +280,8 @@ struct idxd_device {
unsigned int pasid;
int num_groups;
+ int irq_cnt;
+ bool request_int_handles;
u32 msix_perm_offset;
u32 wqcfg_offset;
@@ -276,24 +292,20 @@ struct idxd_device {
u32 max_batch_size;
int max_groups;
int max_engines;
- int max_tokens;
+ int max_rdbufs;
int max_wqs;
int max_wq_size;
- int token_limit;
- int nr_tokens; /* non-reserved tokens */
+ int rdbuf_limit;
+ int nr_rdbufs; /* non-reserved read buffers */
unsigned int wqcfg_size;
union sw_err_reg sw_err;
wait_queue_head_t cmd_waitq;
- int num_wq_irqs;
- struct idxd_irq_entry *irq_entries;
struct idxd_dma_dev *idxd_dma;
struct workqueue_struct *wq;
struct work_struct work;
- int *int_handles;
-
struct idxd_pmu *idxd_pmu;
};
@@ -380,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
idev->type = type;
}
+static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx)
+{
+ return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie;
+}
+
+static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_wq, ie);
+}
+
+static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_device, ie);
+}
+
extern struct bus_type dsa_bus_type;
extern bool support_enqcmd;
@@ -518,17 +545,13 @@ void idxd_unregister_devices(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
void idxd_wqs_quiesce(struct idxd_device *idxd);
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
/* device interrupt control */
-void idxd_msix_perm_setup(struct idxd_device *idxd);
-void idxd_msix_perm_clear(struct idxd_device *idxd);
irqreturn_t idxd_misc_thread(int vec, void *data);
irqreturn_t idxd_wq_thread(int irq, void *data);
void idxd_mask_error_interrupts(struct idxd_device *idxd);
void idxd_unmask_error_interrupts(struct idxd_device *idxd);
-void idxd_mask_msix_vectors(struct idxd_device *idxd);
-void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
-void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
/* device control */
int idxd_register_idxd_drv(void);
@@ -564,13 +587,17 @@ int idxd_wq_map_portal(struct idxd_wq *wq);
void idxd_wq_unmap_portal(struct idxd_wq *wq);
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
int idxd_wq_disable_pasid(struct idxd_wq *wq);
+void __idxd_wq_quiesce(struct idxd_wq *wq);
void idxd_wq_quiesce(struct idxd_wq *wq);
int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
+void idxd_wq_free_irq(struct idxd_wq *wq);
+int idxd_wq_request_irq(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc);
/* dmaengine */
int idxd_register_dma_device(struct idxd_device *idxd);
@@ -579,7 +606,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq);
void idxd_unregister_dma_channel(struct idxd_wq *wq);
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
void idxd_dma_complete_txd(struct idxd_desc *desc,
- enum idxd_complete_type comp_type);
+ enum idxd_complete_type comp_type, bool free_desc);
/* cdev */
int idxd_cdev_register(void);
@@ -603,10 +630,4 @@ static inline void perfmon_init(void) {}
static inline void perfmon_exit(void) {}
#endif
-static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
-{
- idxd_dma_complete_txd(desc, reason);
- idxd_free_desc(desc->wq, desc);
-}
-
#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 7bf03f371ce1..08a5f4310188 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
- struct idxd_irq_entry *irq_entry;
+ struct idxd_irq_entry *ie;
int i, msixcnt;
int rc = 0;
@@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
dev_err(dev, "Not MSI-X interrupt capable.\n");
return -ENOSPC;
}
+ idxd->irq_cnt = msixcnt;
rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
if (rc != msixcnt) {
@@ -89,87 +90,34 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
}
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
- /*
- * We implement 1 completion list per MSI-X entry except for
- * entry 0, which is for errors and others.
- */
- idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
- GFP_KERNEL, dev_to_node(dev));
- if (!idxd->irq_entries) {
- rc = -ENOMEM;
- goto err_irq_entries;
- }
-
- for (i = 0; i < msixcnt; i++) {
- idxd->irq_entries[i].id = i;
- idxd->irq_entries[i].idxd = idxd;
- idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
- spin_lock_init(&idxd->irq_entries[i].list_lock);
- }
-
- idxd_msix_perm_setup(idxd);
- irq_entry = &idxd->irq_entries[0];
- rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
- 0, "idxd-misc", irq_entry);
+ ie = idxd_get_ie(idxd, 0);
+ ie->vector = pci_irq_vector(pdev, 0);
+ rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
if (rc < 0) {
dev_err(dev, "Failed to allocate misc interrupt.\n");
goto err_misc_irq;
}
+ dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
- dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
-
- /* first MSI-X entry is not for wq interrupts */
- idxd->num_wq_irqs = msixcnt - 1;
+ for (i = 0; i < idxd->max_wqs; i++) {
+ int msix_idx = i + 1;
- for (i = 1; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
+ ie = idxd_get_ie(idxd, msix_idx);
+ ie->id = msix_idx;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
- init_llist_head(&idxd->irq_entries[i].pending_llist);
- INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
- rc = request_threaded_irq(irq_entry->vector, NULL,
- idxd_wq_thread, 0, "idxd-portal", irq_entry);
- if (rc < 0) {
- dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
- goto err_wq_irqs;
- }
-
- dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
- /*
- * The MSIX vector enumeration starts at 1 with vector 0 being the
- * misc interrupt that handles non I/O completion events. The
- * interrupt handles are for IMS enumeration on guest. The misc
- * interrupt vector does not require a handle and therefore we start
- * the int_handles at index 0. Since 'i' starts at 1, the first
- * int_handles index will be 0.
- */
- rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
- IDXD_IRQ_MSIX);
- if (rc < 0) {
- free_irq(irq_entry->vector, irq_entry);
- goto err_wq_irqs;
- }
- dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
- }
+ spin_lock_init(&ie->list_lock);
+ init_llist_head(&ie->pending_llist);
+ INIT_LIST_HEAD(&ie->work_list);
}
idxd_unmask_error_interrupts(idxd);
return 0;
- err_wq_irqs:
- while (--i >= 0) {
- irq_entry = &idxd->irq_entries[i];
- free_irq(irq_entry->vector, irq_entry);
- if (i != 0)
- idxd_device_release_int_handle(idxd,
- idxd->int_handles[i], IDXD_IRQ_MSIX);
- }
err_misc_irq:
- /* Disable error interrupt generation */
idxd_mask_error_interrupts(idxd);
- idxd_msix_perm_clear(idxd);
- err_irq_entries:
pci_free_irq_vectors(pdev);
dev_err(dev, "No usable interrupts\n");
return rc;
@@ -178,26 +126,16 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
static void idxd_cleanup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
- struct idxd_irq_entry *irq_entry;
- int i, msixcnt;
+ struct idxd_irq_entry *ie;
+ int msixcnt;
msixcnt = pci_msix_vec_count(pdev);
if (msixcnt <= 0)
return;
- irq_entry = &idxd->irq_entries[0];
- free_irq(irq_entry->vector, irq_entry);
-
- for (i = 1; i < msixcnt; i++) {
-
- irq_entry = &idxd->irq_entries[i];
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
- idxd_device_release_int_handle(idxd, idxd->int_handles[i],
- IDXD_IRQ_MSIX);
- free_irq(irq_entry->vector, irq_entry);
- }
-
+ ie = idxd_get_ie(idxd, 0);
idxd_mask_error_interrupts(idxd);
+ free_irq(ie->vector, ie);
pci_free_irq_vectors(pdev);
}
@@ -237,8 +175,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
mutex_init(&wq->wq_lock);
init_waitqueue_head(&wq->err_queue);
init_completion(&wq->wq_dead);
- wq->max_xfer_bytes = idxd->max_xfer_bytes;
- wq->max_batch_size = idxd->max_batch_size;
+ init_completion(&wq->wq_resurrect);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
if (!wq->wqcfg) {
put_device(conf_dev);
@@ -379,13 +319,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
init_waitqueue_head(&idxd->cmd_waitq);
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
- idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL,
- dev_to_node(dev));
- if (!idxd->int_handles)
- return -ENOMEM;
- }
-
rc = idxd_setup_wqs(idxd);
if (rc < 0)
goto err_wqs;
@@ -416,7 +349,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
for (i = 0; i < idxd->max_wqs; i++)
put_device(wq_confdev(idxd->wqs[i]));
err_wqs:
- kfree(idxd->int_handles);
return rc;
}
@@ -451,6 +383,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
}
+ /* reading command capabilities */
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
+ idxd->request_int_handles = true;
+
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
@@ -464,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
idxd->max_groups = idxd->hw.group_cap.num_groups;
dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
- idxd->max_tokens = idxd->hw.group_cap.total_tokens;
- dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
- idxd->nr_tokens = idxd->max_tokens;
+ idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
+ dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
+ idxd->nr_rdbufs = idxd->max_rdbufs;
/* read engine capabilities */
idxd->hw.engine_cap.bits =
@@ -611,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd)
if (rc)
goto err_config;
- dev_dbg(dev, "IDXD interrupt setup complete.\n");
-
idxd->major = idxd_cdev_get_major(idxd);
rc = perfmon_pmu_init(idxd);
@@ -708,32 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return rc;
}
-static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
-{
- struct idxd_desc *desc, *itr;
- struct llist_node *head;
-
- head = llist_del_all(&ie->pending_llist);
- if (!head)
- return;
-
- llist_for_each_entry_safe(desc, itr, head, llnode) {
- idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
- idxd_free_desc(desc->wq, desc);
- }
-}
-
-static void idxd_flush_work_list(struct idxd_irq_entry *ie)
-{
- struct idxd_desc *desc, *iter;
-
- list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
- list_del(&desc->list);
- idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
- idxd_free_desc(desc->wq, desc);
- }
-}
-
void idxd_wqs_quiesce(struct idxd_device *idxd)
{
struct idxd_wq *wq;
@@ -746,47 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd)
}
}
-static void idxd_release_int_handles(struct idxd_device *idxd)
-{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
-
- for (i = 0; i < idxd->num_wq_irqs; i++) {
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
- rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
- IDXD_IRQ_MSIX);
- if (rc < 0)
- dev_warn(dev, "irq handle %d release failed\n",
- idxd->int_handles[i]);
- else
- dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
- }
- }
-}
-
static void idxd_shutdown(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
- int rc, i;
struct idxd_irq_entry *irq_entry;
- int msixcnt = pci_msix_vec_count(pdev);
+ int rc;
rc = idxd_device_disable(idxd);
if (rc)
dev_err(&pdev->dev, "Disabling device failed\n");
- dev_dbg(&pdev->dev, "%s called\n", __func__);
- idxd_mask_msix_vectors(idxd);
+ irq_entry = &idxd->ie;
+ synchronize_irq(irq_entry->vector);
idxd_mask_error_interrupts(idxd);
-
- for (i = 0; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
- synchronize_irq(irq_entry->vector);
- if (i == 0)
- continue;
- idxd_flush_pending_llist(irq_entry);
- idxd_flush_work_list(irq_entry);
- }
flush_workqueue(idxd->wq);
}
@@ -794,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
struct idxd_irq_entry *irq_entry;
- int msixcnt = pci_msix_vec_count(pdev);
- int i;
idxd_unregister_devices(idxd);
/*
@@ -811,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev)
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
- for (i = 0; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
- free_irq(irq_entry->vector, irq_entry);
- }
- idxd_msix_perm_clear(idxd);
- idxd_release_int_handles(idxd);
+ irq_entry = idxd_get_ie(idxd, 0);
+ free_irq(irq_entry->vector, irq_entry);
pci_free_irq_vectors(pdev);
pci_iounmap(pdev, idxd->reg_base);
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index cf2c8bc4f147..743ead5ebc57 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h>
+#include <linux/delay.h>
#include <uapi/linux/idxd.h>
#include "../dmaengine.h"
#include "idxd.h"
@@ -22,6 +23,16 @@ struct idxd_fault {
struct idxd_device *idxd;
};
+struct idxd_resubmit {
+ struct work_struct work;
+ struct idxd_desc *desc;
+};
+
+struct idxd_int_handle_revoke {
+ struct work_struct work;
+ struct idxd_device *idxd;
+};
+
static void idxd_device_reinit(struct work_struct *work)
{
struct idxd_device *idxd = container_of(work, struct idxd_device, work);
@@ -55,6 +66,162 @@ static void idxd_device_reinit(struct work_struct *work)
idxd_device_clear_state(idxd);
}
+/*
+ * The function sends a drain descriptor for the interrupt handle. The drain ensures
+ * all descriptors with this interrupt handle is flushed and the interrupt
+ * will allow the cleanup of the outstanding descriptors.
+ */
+static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
+{
+ struct idxd_wq *wq = ie_to_wq(ie);
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ struct dsa_hw_desc desc = {};
+ void __iomem *portal;
+ int rc;
+
+ /* Issue a simple drain operation with interrupt but no completion record */
+ desc.flags = IDXD_OP_FLAG_RCI;
+ desc.opcode = DSA_OPCODE_DRAIN;
+ desc.priv = 1;
+
+ if (ie->pasid != INVALID_IOASID)
+ desc.pasid = ie->pasid;
+ desc.int_handle = ie->int_handle;
+ portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * The wmb() makes sure that the descriptor is all there before we
+ * issue.
+ */
+ wmb();
+ if (wq_dedicated(wq)) {
+ iosubmit_cmds512(portal, &desc, 1);
+ } else {
+ rc = idxd_enqcmds(wq, portal, &desc);
+ /* This should not fail unless hardware failed. */
+ if (rc < 0)
+ dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
+ }
+}
+
+static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
+{
+ LIST_HEAD(flist);
+ struct idxd_desc *d, *t;
+ struct llist_node *head;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(d, t, head, llnode)
+ list_add_tail(&d->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(d, t, &ie->work_list, list) {
+ if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
+ list_move_tail(&d->list, &flist);
+ }
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(d, t, &flist, list) {
+ list_del(&d->list);
+ idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
+ }
+}
+
+static void idxd_int_handle_revoke(struct work_struct *work)
+{
+ struct idxd_int_handle_revoke *revoke =
+ container_of(work, struct idxd_int_handle_revoke, work);
+ struct idxd_device *idxd = revoke->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ int i, new_handle, rc;
+
+ if (!idxd->request_int_handles) {
+ kfree(revoke);
+ dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
+ return;
+ }
+
+ /*
+ * The loop attempts to acquire new interrupt handle for all interrupt
+ * vectors that supports a handle. If a new interrupt handle is acquired and the
+ * wq is kernel type, the driver will kill the percpu_ref to pause all
+ * ongoing descriptor submissions. The interrupt handle is then changed.
+ * After change, the percpu_ref is revived and all the pending submissions
+ * are woken to try again. A drain is sent to for the interrupt handle
+ * at the end to make sure all invalid int handle descriptors are processed.
+ */
+ for (i = 1; i < idxd->irq_cnt; i++) {
+ struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
+ struct idxd_wq *wq = ie_to_wq(ie);
+
+ if (ie->int_handle == INVALID_INT_HANDLE)
+ continue;
+
+ rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
+ if (rc < 0) {
+ dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
+ /*
+ * Failed to acquire new interrupt handle. Kill the WQ
+ * and release all the pending submitters. The submitters will
+ * get error return code and handle appropriately.
+ */
+ ie->int_handle = INVALID_INT_HANDLE;
+ idxd_wq_quiesce(wq);
+ idxd_abort_invalid_int_handle_descs(ie);
+ continue;
+ }
+
+ /* No change in interrupt handle, nothing needs to be done */
+ if (ie->int_handle == new_handle)
+ continue;
+
+ if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
+ /*
+ * All the MSIX interrupts are allocated at once during probe.
+ * Therefore we need to update all interrupts even if the WQ
+ * isn't supporting interrupt operations.
+ */
+ ie->int_handle = new_handle;
+ continue;
+ }
+
+ mutex_lock(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
+
+ /* Kill percpu_ref to pause additional descriptor submissions */
+ percpu_ref_kill(&wq->wq_active);
+
+ /* Wait for all submitters quiesce before we change interrupt handle */
+ wait_for_completion(&wq->wq_dead);
+
+ ie->int_handle = new_handle;
+
+ /* Revive percpu ref and wake up all the waiting submitters */
+ percpu_ref_reinit(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
+ mutex_unlock(&wq->wq_lock);
+
+ /*
+ * The delay here is to wait for all possible MOVDIR64B that
+ * are issued before percpu_ref_kill() has happened to have
+ * reached the PCIe domain before the drain is issued. The driver
+ * needs to ensure that the drain descriptor issued does not pass
+ * all the other issued descriptors that contain the invalid
+ * interrupt handle in order to ensure that the drain descriptor
+ * interrupt will allow the cleanup of all the descriptors with
+ * invalid interrupt handle.
+ */
+ if (wq_dedicated(wq))
+ udelay(100);
+ idxd_int_handle_revoke_drain(ie);
+ }
+ kfree(revoke);
+}
+
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
{
struct device *dev = &idxd->pdev->dev;
@@ -101,6 +268,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
err = true;
}
+ if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
+ struct idxd_int_handle_revoke *revoke;
+
+ val |= IDXD_INTC_INT_HANDLE_REVOKED;
+
+ revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
+ if (revoke) {
+ revoke->idxd = idxd;
+ INIT_WORK(&revoke->work, idxd_int_handle_revoke);
+ queue_work(idxd->wq, &revoke->work);
+
+ } else {
+ dev_err(dev, "Failed to allocate work for int handle revoke\n");
+ idxd_wqs_quiesce(idxd);
+ }
+ }
+
if (cause & IDXD_INTC_CMD) {
val |= IDXD_INTC_CMD;
complete(idxd->cmd_done);
@@ -157,7 +341,7 @@ halt:
irqreturn_t idxd_misc_thread(int vec, void *data)
{
struct idxd_irq_entry *irq_entry = data;
- struct idxd_device *idxd = irq_entry->idxd;
+ struct idxd_device *idxd = ie_to_idxd(irq_entry);
int rc;
u32 cause;
@@ -177,6 +361,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
return IRQ_HANDLED;
}
+static void idxd_int_handle_resubmit_work(struct work_struct *work)
+{
+ struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
+ struct idxd_desc *desc = irw->desc;
+ struct idxd_wq *wq = desc->wq;
+ int rc;
+
+ desc->completion->status = 0;
+ rc = idxd_submit_desc(wq, desc);
+ if (rc < 0) {
+ dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
+ desc->id, wq->id);
+ /*
+ * If the error is not -EAGAIN, it means the submission failed due to wq
+ * has been killed instead of ENQCMDS failure. Here the driver needs to
+ * notify the submitter of the failure by reporting abort status.
+ *
+ * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
+ * abort.
+ */
+ if (rc != -EAGAIN) {
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
+ }
+ idxd_free_desc(wq, desc);
+ }
+ kfree(irw);
+}
+
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
+{
+ struct idxd_wq *wq = desc->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_resubmit *irw;
+
+ irw = kzalloc(sizeof(*irw), GFP_KERNEL);
+ if (!irw)
+ return false;
+
+ irw->desc = desc;
+ INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
+ queue_work(idxd->wq, &irw->work);
+ return true;
+}
+
static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
{
struct idxd_desc *desc, *t;
@@ -195,11 +424,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
- complete_desc(desc, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
- complete_desc(desc, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
} else {
spin_lock(&irq_entry->list_lock);
list_add_tail(&desc->list,
@@ -238,11 +467,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
- complete_desc(desc, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
- complete_desc(desc, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
}
}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 262c8220adbd..aa642aecdc0b 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -64,9 +64,9 @@ union wq_cap_reg {
union group_cap_reg {
struct {
u64 num_groups:8;
- u64 total_tokens:8;
- u64 token_en:1;
- u64 token_limit:1;
+ u64 total_rdbufs:8; /* formerly total_tokens */
+ u64 rdbuf_ctrl:1; /* formerly token_en */
+ u64 rdbuf_limit:1; /* formerly token_limit */
u64 rsvd:46;
};
u64 bits;
@@ -110,7 +110,7 @@ union offsets_reg {
#define IDXD_GENCFG_OFFSET 0x80
union gencfg_reg {
struct {
- u32 token_limit:8;
+ u32 rdbuf_limit:8;
u32 rsvd:4;
u32 user_int_en:1;
u32 rsvd2:19;
@@ -158,6 +158,7 @@ enum idxd_device_reset_type {
#define IDXD_INTC_OCCUPY 0x04
#define IDXD_INTC_PERFMON_OVFL 0x08
#define IDXD_INTC_HALT_STATE 0x10
+#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
#define IDXD_CMD_OFFSET 0xa0
union idxd_command_reg {
@@ -287,10 +288,10 @@ union group_flags {
u32 tc_a:3;
u32 tc_b:3;
u32 rsvd:1;
- u32 use_token_limit:1;
- u32 tokens_reserved:8;
+ u32 use_rdbuf_limit:1;
+ u32 rdbufs_reserved:8;
u32 rsvd2:4;
- u32 tokens_allowed:8;
+ u32 rdbufs_allowed:8;
u32 rsvd3:4;
};
u32 bits;
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 83452fbbb168..e289fd48711a 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
if (device_pasid_enabled(idxd))
desc->hw->pasid = idxd->pasid;
- /*
- * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match
- * vector 1:1 to the WQ id, we need to add 1
- */
- if (!idxd->int_handles)
- desc->hw->int_handle = wq->id + 1;
- else
- desc->hw->int_handle = idxd->int_handles[wq->id];
-
return desc;
}
@@ -134,35 +125,58 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
spin_unlock(&ie->list_lock);
if (found)
- complete_desc(found, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
/*
- * complete_desc() will return desc to allocator and the desc can be
- * acquired by a different process and the desc->list can be modified.
- * Delete desc from list so the list trasversing does not get corrupted
- * by the other process.
+ * completing the descriptor will return desc to allocator and
+ * the desc can be acquired by a different process and the
+ * desc->list can be modified. Delete desc from list so the
+ * list trasversing does not get corrupted by the other process.
*/
list_for_each_entry_safe(d, t, &flist, list) {
list_del_init(&d->list);
- complete_desc(d, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
}
}
+/*
+ * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
+ * has better control of number of descriptors being submitted to a shared wq by limiting
+ * the number of driver allocated descriptors to the wq size. However, when the swq is
+ * exported to a guest kernel, it may be shared with multiple guest kernels. This means
+ * the likelihood of getting busy returned on the swq when submitting goes significantly up.
+ * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
+ * up. The sysfs knob can be tuned by the system administrator.
+ */
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
+{
+ int rc, retries = 0;
+
+ do {
+ rc = enqcmds(portal, desc);
+ if (rc == 0)
+ break;
+ cpu_relax();
+ } while (retries++ < wq->enqcmds_retries);
+
+ return rc;
+}
+
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
{
struct idxd_device *idxd = wq->idxd;
struct idxd_irq_entry *ie = NULL;
+ u32 desc_flags = desc->hw->flags;
void __iomem *portal;
int rc;
- if (idxd->state != IDXD_DEV_ENABLED) {
- idxd_free_desc(wq, desc);
+ if (idxd->state != IDXD_DEV_ENABLED)
return -EIO;
- }
if (!percpu_ref_tryget_live(&wq->wq_active)) {
- idxd_free_desc(wq, desc);
- return -ENXIO;
+ wait_for_completion(&wq->wq_resurrect);
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
}
portal = idxd_wq_portal_addr(wq);
@@ -178,28 +192,21 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
* Pending the descriptor to the lockless list for the irq_entry
* that we designated the descriptor to.
*/
- if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
- ie = &idxd->irq_entries[wq->id + 1];
+ if (desc_flags & IDXD_OP_FLAG_RCI) {
+ ie = &wq->ie;
+ desc->hw->int_handle = ie->int_handle;
llist_add(&desc->llnode, &ie->pending_llist);
}
if (wq_dedicated(wq)) {
iosubmit_cmds512(portal, desc->hw, 1);
} else {
- /*
- * It's not likely that we would receive queue full rejection
- * since the descriptor allocation gates at wq size. If we
- * receive a -EAGAIN, that means something went wrong such as the
- * device is not accepting descriptor at all.
- */
- rc = enqcmds(portal, desc->hw);
+ rc = idxd_enqcmds(wq, portal, desc->hw);
if (rc < 0) {
percpu_ref_put(&wq->wq_active);
/* abort operation frees the descriptor */
if (ie)
llist_abort_desc(wq, ie, desc);
- else
- idxd_free_desc(wq, desc);
return rc;
}
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index a9025be940db..7e19ab92b61a 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -99,31 +99,39 @@ struct device_type idxd_engine_device_type = {
/* Group attributes */
-static void idxd_set_free_tokens(struct idxd_device *idxd)
+static void idxd_set_free_rdbufs(struct idxd_device *idxd)
{
- int i, tokens;
+ int i, rdbufs;
- for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
+ for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) {
struct idxd_group *g = idxd->groups[i];
- tokens += g->tokens_reserved;
+ rdbufs += g->rdbufs_reserved;
}
- idxd->nr_tokens = idxd->max_tokens - tokens;
+ idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs;
+}
+
+static ssize_t group_read_buffers_reserved_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_reserved);
}
static ssize_t group_tokens_reserved_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->tokens_reserved);
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_show(dev, attr, buf);
}
-static ssize_t group_tokens_reserved_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_read_buffers_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -143,33 +151,53 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
if (idxd->state == IDXD_DEV_ENABLED)
return -EPERM;
- if (val > idxd->max_tokens)
+ if (val > idxd->max_rdbufs)
return -EINVAL;
- if (val > idxd->nr_tokens + group->tokens_reserved)
+ if (val > idxd->nr_rdbufs + group->rdbufs_reserved)
return -EINVAL;
- group->tokens_reserved = val;
- idxd_set_free_tokens(idxd);
+ group->rdbufs_reserved = val;
+ idxd_set_free_rdbufs(idxd);
return count;
}
+static ssize_t group_tokens_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_tokens_reserved =
__ATTR(tokens_reserved, 0644, group_tokens_reserved_show,
group_tokens_reserved_store);
+static struct device_attribute dev_attr_group_read_buffers_reserved =
+ __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show,
+ group_read_buffers_reserved_store);
+
+static ssize_t group_read_buffers_allowed_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_allowed);
+}
+
static ssize_t group_tokens_allowed_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->tokens_allowed);
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_show(dev, attr, buf);
}
-static ssize_t group_tokens_allowed_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_read_buffers_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -190,29 +218,49 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
return -EPERM;
if (val < 4 * group->num_engines ||
- val > group->tokens_reserved + idxd->nr_tokens)
+ val > group->rdbufs_reserved + idxd->nr_rdbufs)
return -EINVAL;
- group->tokens_allowed = val;
+ group->rdbufs_allowed = val;
return count;
}
+static ssize_t group_tokens_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_tokens_allowed =
__ATTR(tokens_allowed, 0644, group_tokens_allowed_show,
group_tokens_allowed_store);
+static struct device_attribute dev_attr_group_read_buffers_allowed =
+ __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show,
+ group_read_buffers_allowed_store);
+
+static ssize_t group_use_read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit);
+}
+
static ssize_t group_use_token_limit_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->use_token_limit);
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_show(dev, attr, buf);
}
-static ssize_t group_use_token_limit_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_use_read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -232,17 +280,29 @@ static ssize_t group_use_token_limit_store(struct device *dev,
if (idxd->state == IDXD_DEV_ENABLED)
return -EPERM;
- if (idxd->token_limit == 0)
+ if (idxd->rdbuf_limit == 0)
return -EPERM;
- group->use_token_limit = !!val;
+ group->use_rdbuf_limit = !!val;
return count;
}
+static ssize_t group_use_token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_use_token_limit =
__ATTR(use_token_limit, 0644, group_use_token_limit_show,
group_use_token_limit_store);
+static struct device_attribute dev_attr_group_use_read_buffer_limit =
+ __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show,
+ group_use_read_buffer_limit_store);
+
static ssize_t group_engines_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -387,8 +447,11 @@ static struct attribute *idxd_group_attributes[] = {
&dev_attr_group_work_queues.attr,
&dev_attr_group_engines.attr,
&dev_attr_group_use_token_limit.attr,
+ &dev_attr_group_use_read_buffer_limit.attr,
&dev_attr_group_tokens_allowed.attr,
+ &dev_attr_group_read_buffers_allowed.attr,
&dev_attr_group_tokens_reserved.attr,
+ &dev_attr_group_read_buffers_reserved.attr,
&dev_attr_group_traffic_class_a.attr,
&dev_attr_group_traffic_class_b.attr,
NULL,
@@ -945,6 +1008,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at
static struct device_attribute dev_attr_wq_occupancy =
__ATTR(occupancy, 0444, wq_occupancy_show, NULL);
+static ssize_t wq_enqcmds_retries_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ return sysfs_emit(buf, "%u\n", wq->enqcmds_retries);
+}
+
+static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ int rc;
+ unsigned int retries;
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ rc = kstrtouint(buf, 10, &retries);
+ if (rc < 0)
+ return rc;
+
+ if (retries > IDXD_ENQCMDS_MAX_RETRIES)
+ retries = IDXD_ENQCMDS_MAX_RETRIES;
+
+ wq->enqcmds_retries = retries;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_enqcmds_retries =
+ __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store);
+
static struct attribute *idxd_wq_attributes[] = {
&dev_attr_wq_clients.attr,
&dev_attr_wq_state.attr,
@@ -961,6 +1059,7 @@ static struct attribute *idxd_wq_attributes[] = {
&dev_attr_wq_max_batch_size.attr,
&dev_attr_wq_ats_disable.attr,
&dev_attr_wq_occupancy.attr,
+ &dev_attr_wq_enqcmds_retries.attr,
NULL,
};
@@ -1156,26 +1255,42 @@ static ssize_t errors_show(struct device *dev,
}
static DEVICE_ATTR_RO(errors);
+static ssize_t max_read_buffers_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_rdbufs);
+}
+
static ssize_t max_tokens_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n");
+ return max_read_buffers_show(dev, attr, buf);
+}
+
+static DEVICE_ATTR_RO(max_tokens); /* deprecated */
+static DEVICE_ATTR_RO(max_read_buffers);
+
+static ssize_t read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
struct idxd_device *idxd = confdev_to_idxd(dev);
- return sysfs_emit(buf, "%u\n", idxd->max_tokens);
+ return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit);
}
-static DEVICE_ATTR_RO(max_tokens);
static ssize_t token_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct idxd_device *idxd = confdev_to_idxd(dev);
-
- return sysfs_emit(buf, "%u\n", idxd->token_limit);
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n");
+ return read_buffer_limit_show(dev, attr, buf);
}
-static ssize_t token_limit_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_device *idxd = confdev_to_idxd(dev);
unsigned long val;
@@ -1191,16 +1306,26 @@ static ssize_t token_limit_store(struct device *dev,
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
return -EPERM;
- if (!idxd->hw.group_cap.token_limit)
+ if (!idxd->hw.group_cap.rdbuf_limit)
return -EPERM;
- if (val > idxd->hw.group_cap.total_tokens)
+ if (val > idxd->hw.group_cap.total_rdbufs)
return -EINVAL;
- idxd->token_limit = val;
+ idxd->rdbuf_limit = val;
return count;
}
-static DEVICE_ATTR_RW(token_limit);
+
+static ssize_t token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n");
+ return read_buffer_limit_store(dev, attr, buf, count);
+}
+
+static DEVICE_ATTR_RW(token_limit); /* deprecated */
+static DEVICE_ATTR_RW(read_buffer_limit);
static ssize_t cdev_major_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1246,7 +1371,9 @@ static struct attribute *idxd_device_attributes[] = {
&dev_attr_state.attr,
&dev_attr_errors.attr,
&dev_attr_max_tokens.attr,
+ &dev_attr_max_read_buffers.attr,
&dev_attr_token_limit.attr,
+ &dev_attr_read_buffer_limit.attr,
&dev_attr_cdev_major.attr,
&dev_attr_cmd_status.attr,
NULL,
@@ -1268,8 +1395,6 @@ static void idxd_conf_device_release(struct device *dev)
kfree(idxd->groups);
kfree(idxd->wqs);
kfree(idxd->engines);
- kfree(idxd->irq_entries);
- kfree(idxd->int_handles);
ida_free(&idxd_ida, idxd->id);
kfree(idxd);
}
diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c
index aa44bcd6a356..168adf28c5b1 100644
--- a/drivers/dma/ioat/sysfs.c
+++ b/drivers/dma/ioat/sysfs.c
@@ -158,8 +158,9 @@ static struct attribute *ioat_attrs[] = {
&intr_coalesce_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(ioat);
struct kobj_type ioat_ktype = {
.sysfs_ops = &ioat_sysfs_ops,
- .default_attrs = ioat_attrs,
+ .default_groups = ioat_groups,
};
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 1da04112fcdb..c359decc07a3 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -835,7 +835,7 @@ static int pch_dma_probe(struct pci_dev *pdev,
goto err_disable_pdev;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Cannot set proper DMA config\n");
goto err_free_res;
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index e2b5129c5f84..5e46e347e28b 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -3240,7 +3240,6 @@ static int ppc440spe_adma_dma2rxor_prep_src(
struct ppc440spe_rxor *cursor, int index,
int src_cnt, u32 addr)
{
- int rval = 0;
u32 sign;
struct ppc440spe_adma_desc_slot *desc = hdesc;
int i;
@@ -3348,7 +3347,7 @@ static int ppc440spe_adma_dma2rxor_prep_src(
break;
}
- return rval;
+ return 0;
}
/**
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 1a1b7d8458c9..94f3648f7483 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -2206,10 +2206,8 @@ static int gpi_probe(struct platform_device *pdev)
/* set up irq */
ret = platform_get_irq(pdev, i);
- if (ret < 0) {
- dev_err(gpi_dev->dev, "platform_get_irq failed for %d:%d\n", i, ret);
+ if (ret < 0)
return ret;
- }
gpii->irq = ret;
/* set up channel specific register info */
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 5c7716fd6bc5..481f45c77ce1 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -236,7 +236,7 @@ struct rcar_dmac_of_data {
#define RCAR_DMAOR_PRI_ROUND_ROBIN (3 << 8)
#define RCAR_DMAOR_AE (1 << 2)
#define RCAR_DMAOR_DME (1 << 0)
-#define RCAR_DMACHCLR 0x0080 /* Not on R-Car V3U */
+#define RCAR_DMACHCLR 0x0080 /* Not on R-Car Gen4 */
#define RCAR_DMADPSEC 0x00a0
#define RCAR_DMASAR 0x0000
@@ -299,8 +299,8 @@ struct rcar_dmac_of_data {
#define RCAR_DMAFIXDAR 0x0014
#define RCAR_DMAFIXDPBASE 0x0060
-/* For R-Car V3U */
-#define RCAR_V3U_DMACHCLR 0x0100
+/* For R-Car Gen4 */
+#define RCAR_GEN4_DMACHCLR 0x0100
/* Hardcode the MEMCPY transfer size to 4 bytes. */
#define RCAR_DMAC_MEMCPY_XFER_SIZE 4
@@ -345,7 +345,7 @@ static void rcar_dmac_chan_clear(struct rcar_dmac *dmac,
struct rcar_dmac_chan *chan)
{
if (dmac->chan_base)
- rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
+ rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
else
rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index));
}
@@ -357,7 +357,7 @@ static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac)
if (dmac->chan_base) {
for_each_rcar_dmac_chan(i, dmac, chan)
- rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
+ rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
} else {
rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask);
}
@@ -2009,7 +2009,7 @@ static const struct rcar_dmac_of_data rcar_dmac_data = {
.chan_offset_stride = 0x80,
};
-static const struct rcar_dmac_of_data rcar_v3u_dmac_data = {
+static const struct rcar_dmac_of_data rcar_gen4_dmac_data = {
.chan_offset_base = 0x0,
.chan_offset_stride = 0x1000,
};
@@ -2019,8 +2019,11 @@ static const struct of_device_id rcar_dmac_of_ids[] = {
.compatible = "renesas,rcar-dmac",
.data = &rcar_dmac_data,
}, {
+ .compatible = "renesas,rcar-gen4-dmac",
+ .data = &rcar_gen4_dmac_data,
+ }, {
.compatible = "renesas,dmac-r8a779a0",
- .data = &rcar_v3u_dmac_data,
+ .data = &rcar_gen4_dmac_data,
},
{ /* Sentinel */ }
};
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 41c6bc650fa3..158e5e7defae 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -1034,9 +1034,7 @@ EXPORT_SYMBOL(shdma_cleanup);
static int __init shdma_enter(void)
{
- shdma_slave_used = kcalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG),
- sizeof(long),
- GFP_KERNEL);
+ shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL);
if (!shdma_slave_used)
return -ENOMEM;
return 0;
@@ -1045,7 +1043,7 @@ module_init(shdma_enter);
static void __exit shdma_exit(void)
{
- kfree(shdma_slave_used);
+ bitmap_free(shdma_slave_used);
}
module_exit(shdma_exit);
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index d30a4a28d3bf..6f57ff0e7b37 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -10,6 +10,7 @@
* Inspired by stm32-dma.c and dma-jz4780.c
*/
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
@@ -32,13 +33,6 @@
#include "virt-dma.h"
-/* MDMA Generic getter/setter */
-#define STM32_MDMA_SHIFT(n) (ffs(n) - 1)
-#define STM32_MDMA_SET(n, mask) (((n) << STM32_MDMA_SHIFT(mask)) & \
- (mask))
-#define STM32_MDMA_GET(n, mask) (((n) & (mask)) >> \
- STM32_MDMA_SHIFT(mask))
-
#define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */
#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */
@@ -80,8 +74,7 @@
#define STM32_MDMA_CCR_HEX BIT(13)
#define STM32_MDMA_CCR_BEX BIT(12)
#define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6)
-#define STM32_MDMA_CCR_PL(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CCR_PL_MASK)
+#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n))
#define STM32_MDMA_CCR_TCIE BIT(5)
#define STM32_MDMA_CCR_BTIE BIT(4)
#define STM32_MDMA_CCR_BRTIE BIT(3)
@@ -99,48 +92,33 @@
#define STM32_MDMA_CTCR_BWM BIT(31)
#define STM32_MDMA_CTCR_SWRM BIT(30)
#define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28)
-#define STM32_MDMA_CTCR_TRGM(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_TRGM_MSK)
-#define STM32_MDMA_CTCR_TRGM_GET(n) STM32_MDMA_GET((n), \
- STM32_MDMA_CTCR_TRGM_MSK)
+#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n))
+#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n))
#define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26)
-#define STM32_MDMA_CTCR_PAM(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTCR_PAM_MASK)
+#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n))
#define STM32_MDMA_CTCR_PKE BIT(25)
#define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18)
-#define STM32_MDMA_CTCR_TLEN(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_TLEN_MSK)
-#define STM32_MDMA_CTCR_TLEN_GET(n) STM32_MDMA_GET((n), \
- STM32_MDMA_CTCR_TLEN_MSK)
+#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n))
+#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n))
#define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18)
-#define STM32_MDMA_CTCR_LEN2(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_LEN2_MSK)
-#define STM32_MDMA_CTCR_LEN2_GET(n) STM32_MDMA_GET((n), \
- STM32_MDMA_CTCR_LEN2_MSK)
+#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n))
+#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n))
#define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15)
-#define STM32_MDMA_CTCR_DBURST(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTCR_DBURST_MASK)
+#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n))
#define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12)
-#define STM32_MDMA_CTCR_SBURST(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTCR_SBURST_MASK)
+#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n))
#define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10)
-#define STM32_MDMA_CTCR_DINCOS(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_DINCOS_MASK)
+#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n))
#define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8)
-#define STM32_MDMA_CTCR_SINCOS(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_SINCOS_MASK)
+#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n))
#define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6)
-#define STM32_MDMA_CTCR_DSIZE(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTCR_DSIZE_MASK)
+#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n))
#define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4)
-#define STM32_MDMA_CTCR_SSIZE(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTCR_SSIZE_MASK)
+#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n))
#define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2)
-#define STM32_MDMA_CTCR_DINC(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_DINC_MASK)
+#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n))
#define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0)
-#define STM32_MDMA_CTCR_SINC(n) STM32_MDMA_SET((n), \
- STM32_MDMA_CTCR_SINC_MASK)
+#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n))
#define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \
| STM32_MDMA_CTCR_DINC_MASK \
| STM32_MDMA_CTCR_SINCOS_MASK \
@@ -151,16 +129,13 @@
/* MDMA Channel x block number of data register */
#define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x))
#define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20)
-#define STM32_MDMA_CBNDTR_BRC(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CBNDTR_BRC_MK)
-#define STM32_MDMA_CBNDTR_BRC_GET(n) STM32_MDMA_GET((n), \
- STM32_MDMA_CBNDTR_BRC_MK)
+#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n))
+#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n))
#define STM32_MDMA_CBNDTR_BRDUM BIT(19)
#define STM32_MDMA_CBNDTR_BRSUM BIT(18)
#define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0)
-#define STM32_MDMA_CBNDTR_BNDT(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CBNDTR_BNDT_MASK)
+#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n))
/* MDMA Channel x source address register */
#define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x))
@@ -171,11 +146,9 @@
/* MDMA Channel x block repeat address update register */
#define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x))
#define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16)
-#define STM32_MDMA_CBRUR_DUV(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CBRUR_DUV_MASK)
+#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n))
#define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0)
-#define STM32_MDMA_CBRUR_SUV(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CBRUR_SUV_MASK)
+#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n))
/* MDMA Channel x link address register */
#define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x))
@@ -184,9 +157,8 @@
#define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x))
#define STM32_MDMA_CTBR_DBUS BIT(17)
#define STM32_MDMA_CTBR_SBUS BIT(16)
-#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0)
-#define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \
- STM32_MDMA_CTBR_TSEL_MASK)
+#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0)
+#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n))
/* MDMA Channel x mask address register */
#define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x))
@@ -1279,7 +1251,7 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
u32 curr_hwdesc)
{
struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
- struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc;
+ struct stm32_mdma_hwdesc *hwdesc;
u32 cbndtr, residue, modulo, burst_size;
int i;
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index bd496efadff7..1d4081a049b7 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \
k3-psil-am654.o \
k3-psil-j721e.o \
k3-psil-j7200.o \
- k3-psil-am64.o
+ k3-psil-am64.o \
+ k3-psil-j721s2.o
obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index 35d81bd857f1..08e47f44d325 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -1681,8 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
emr = val;
- for (i = find_next_bit(&emr, 32, 0); i < 32;
- i = find_next_bit(&emr, 32, i + 1)) {
+ for_each_set_bit(i, &emr, 32) {
int k = (j << 5) + i;
/* Clear the corresponding EMR bits */
diff --git a/drivers/dma/ti/k3-psil-j721s2.c b/drivers/dma/ti/k3-psil-j721s2.c
new file mode 100644
index 000000000000..4c4172a4d271
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j721s2.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_PDMA_MCASP(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pdma_acc32 = 1, \
+ .pdma_burst = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ }, \
+ }
+
+#define PSIL_SA2UL(x, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep j721s2_src_ep_map[] = {
+ /* PDMA_MCASP - McASP0-4 */
+ PSIL_PDMA_MCASP(0x4400),
+ PSIL_PDMA_MCASP(0x4401),
+ PSIL_PDMA_MCASP(0x4402),
+ PSIL_PDMA_MCASP(0x4403),
+ PSIL_PDMA_MCASP(0x4404),
+ /* PDMA_SPI_G0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0x4600),
+ PSIL_PDMA_XY_PKT(0x4601),
+ PSIL_PDMA_XY_PKT(0x4602),
+ PSIL_PDMA_XY_PKT(0x4603),
+ PSIL_PDMA_XY_PKT(0x4604),
+ PSIL_PDMA_XY_PKT(0x4605),
+ PSIL_PDMA_XY_PKT(0x4606),
+ PSIL_PDMA_XY_PKT(0x4607),
+ PSIL_PDMA_XY_PKT(0x4608),
+ PSIL_PDMA_XY_PKT(0x4609),
+ PSIL_PDMA_XY_PKT(0x460a),
+ PSIL_PDMA_XY_PKT(0x460b),
+ PSIL_PDMA_XY_PKT(0x460c),
+ PSIL_PDMA_XY_PKT(0x460d),
+ PSIL_PDMA_XY_PKT(0x460e),
+ PSIL_PDMA_XY_PKT(0x460f),
+ /* PDMA_SPI_G1 - SPI4-7 */
+ PSIL_PDMA_XY_PKT(0x4610),
+ PSIL_PDMA_XY_PKT(0x4611),
+ PSIL_PDMA_XY_PKT(0x4612),
+ PSIL_PDMA_XY_PKT(0x4613),
+ PSIL_PDMA_XY_PKT(0x4614),
+ PSIL_PDMA_XY_PKT(0x4615),
+ PSIL_PDMA_XY_PKT(0x4616),
+ PSIL_PDMA_XY_PKT(0x4617),
+ PSIL_PDMA_XY_PKT(0x4618),
+ PSIL_PDMA_XY_PKT(0x4619),
+ PSIL_PDMA_XY_PKT(0x461a),
+ PSIL_PDMA_XY_PKT(0x461b),
+ PSIL_PDMA_XY_PKT(0x461c),
+ PSIL_PDMA_XY_PKT(0x461d),
+ PSIL_PDMA_XY_PKT(0x461e),
+ PSIL_PDMA_XY_PKT(0x461f),
+ /* PDMA_USART_G0 - UART0-1 */
+ PSIL_PDMA_XY_PKT(0x4700),
+ PSIL_PDMA_XY_PKT(0x4701),
+ /* PDMA_USART_G1 - UART2-3 */
+ PSIL_PDMA_XY_PKT(0x4702),
+ PSIL_PDMA_XY_PKT(0x4703),
+ /* PDMA_USART_G2 - UART4-9 */
+ PSIL_PDMA_XY_PKT(0x4704),
+ PSIL_PDMA_XY_PKT(0x4705),
+ PSIL_PDMA_XY_PKT(0x4706),
+ PSIL_PDMA_XY_PKT(0x4707),
+ PSIL_PDMA_XY_PKT(0x4708),
+ PSIL_PDMA_XY_PKT(0x4709),
+ /* CPSW0 */
+ PSIL_ETHERNET(0x7000),
+ /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+ PSIL_PDMA_XY_PKT(0x7100),
+ PSIL_PDMA_XY_PKT(0x7101),
+ PSIL_PDMA_XY_PKT(0x7102),
+ PSIL_PDMA_XY_PKT(0x7103),
+ /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0x7200),
+ PSIL_PDMA_XY_PKT(0x7201),
+ PSIL_PDMA_XY_PKT(0x7202),
+ PSIL_PDMA_XY_PKT(0x7203),
+ PSIL_PDMA_XY_PKT(0x7204),
+ PSIL_PDMA_XY_PKT(0x7205),
+ PSIL_PDMA_XY_PKT(0x7206),
+ PSIL_PDMA_XY_PKT(0x7207),
+ /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+ PSIL_PDMA_XY_PKT(0x7300),
+ /* MCU_PDMA_ADC - ADC0-1 */
+ PSIL_PDMA_XY_TR(0x7400),
+ PSIL_PDMA_XY_TR(0x7401),
+ PSIL_PDMA_XY_TR(0x7402),
+ PSIL_PDMA_XY_TR(0x7403),
+ /* SA2UL */
+ PSIL_SA2UL(0x7500, 0),
+ PSIL_SA2UL(0x7501, 0),
+ PSIL_SA2UL(0x7502, 0),
+ PSIL_SA2UL(0x7503, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep j721s2_dst_ep_map[] = {
+ /* CPSW0 */
+ PSIL_ETHERNET(0xf000),
+ PSIL_ETHERNET(0xf001),
+ PSIL_ETHERNET(0xf002),
+ PSIL_ETHERNET(0xf003),
+ PSIL_ETHERNET(0xf004),
+ PSIL_ETHERNET(0xf005),
+ PSIL_ETHERNET(0xf006),
+ PSIL_ETHERNET(0xf007),
+ /* SA2UL */
+ PSIL_SA2UL(0xf500, 1),
+ PSIL_SA2UL(0xf501, 1),
+};
+
+struct psil_ep_map j721s2_ep_map = {
+ .name = "j721s2",
+ .src = j721s2_src_ep_map,
+ .src_count = ARRAY_SIZE(j721s2_src_ep_map),
+ .dst = j721s2_dst_ep_map,
+ .dst_count = ARRAY_SIZE(j721s2_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
index b74e192e3c2d..e51e179cdb56 100644
--- a/drivers/dma/ti/k3-psil-priv.h
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -41,5 +41,6 @@ extern struct psil_ep_map am654_ep_map;
extern struct psil_ep_map j721e_ep_map;
extern struct psil_ep_map j7200_ep_map;
extern struct psil_ep_map am64_ep_map;
+extern struct psil_ep_map j721s2_ep_map;
#endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
index 13ce7367d870..8867b4bd0c51 100644
--- a/drivers/dma/ti/k3-psil.c
+++ b/drivers/dma/ti/k3-psil.c
@@ -21,6 +21,7 @@ static const struct soc_device_attribute k3_soc_devices[] = {
{ .family = "J721E", .data = &j721e_ep_map },
{ .family = "J7200", .data = &j7200_ep_map },
{ .family = "AM64X", .data = &am64_ep_map },
+ { .family = "J721S2", .data = &j721s2_ep_map },
{ /* sentinel */ }
};
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index c542ba5d383d..d2d4cbe63e44 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -4374,6 +4374,7 @@ static const struct soc_device_attribute k3_soc_devices[] = {
{ .family = "J721E", .data = &j721e_soc_data },
{ .family = "J7200", .data = &j7200_soc_data },
{ .family = "AM64X", .data = &am64_soc_data },
+ { .family = "J721S2", .data = &j721e_soc_data},
{ /* sentinel */ }
};
diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
index d6b8a202474f..290836b7e1be 100644
--- a/drivers/dma/uniphier-xdmac.c
+++ b/drivers/dma/uniphier-xdmac.c
@@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc)
static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc,
struct uniphier_xdmac_desc *xd)
{
- u32 src_mode, src_addr, src_width;
- u32 dst_mode, dst_addr, dst_width;
+ u32 src_mode, src_width;
+ u32 dst_mode, dst_width;
+ dma_addr_t src_addr, dst_addr;
u32 val, its, tnum;
enum dma_slave_buswidth buswidth;
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 4677ce08ed40..cd62bbb50e8b 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2128,6 +2128,126 @@ error:
}
/**
+ * xilinx_cdma_prep_memcpy_sg - prepare descriptors for a memcpy_sg transaction
+ * @dchan: DMA channel
+ * @dst_sg: Destination scatter list
+ * @dst_sg_len: Number of entries in destination scatter list
+ * @src_sg: Source scatter list
+ * @src_sg_len: Number of entries in source scatter list
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_cdma_prep_memcpy_sg(
+ struct dma_chan *dchan, struct scatterlist *dst_sg,
+ unsigned int dst_sg_len, struct scatterlist *src_sg,
+ unsigned int src_sg_len, unsigned long flags)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_cdma_tx_segment *segment, *prev = NULL;
+ struct xilinx_cdma_desc_hw *hw;
+ size_t len, dst_avail, src_avail;
+ dma_addr_t dma_dst, dma_src;
+
+ if (unlikely(dst_sg_len == 0 || src_sg_len == 0))
+ return NULL;
+
+ if (unlikely(!dst_sg || !src_sg))
+ return NULL;
+
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+ dst_avail = sg_dma_len(dst_sg);
+ src_avail = sg_dma_len(src_sg);
+ /*
+ * loop until there is either no more source or no more destination
+ * scatterlist entry
+ */
+ while (true) {
+ len = min_t(size_t, src_avail, dst_avail);
+ len = min_t(size_t, len, chan->xdev->max_buffer_len);
+ if (len == 0)
+ goto fetch;
+
+ /* Allocate the link descriptor from DMA pool */
+ segment = xilinx_cdma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) -
+ dst_avail;
+ dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) -
+ src_avail;
+ hw = &segment->hw;
+ hw->control = len;
+ hw->src_addr = dma_src;
+ hw->dest_addr = dma_dst;
+ if (chan->ext_addr) {
+ hw->src_addr_msb = upper_32_bits(dma_src);
+ hw->dest_addr_msb = upper_32_bits(dma_dst);
+ }
+
+ if (prev) {
+ prev->hw.next_desc = segment->phys;
+ if (chan->ext_addr)
+ prev->hw.next_desc_msb =
+ upper_32_bits(segment->phys);
+ }
+
+ prev = segment;
+ dst_avail -= len;
+ src_avail -= len;
+ list_add_tail(&segment->node, &desc->segments);
+
+fetch:
+ /* Fetch the next dst scatterlist entry */
+ if (dst_avail == 0) {
+ if (dst_sg_len == 0)
+ break;
+ dst_sg = sg_next(dst_sg);
+ if (dst_sg == NULL)
+ break;
+ dst_sg_len--;
+ dst_avail = sg_dma_len(dst_sg);
+ }
+ /* Fetch the next src scatterlist entry */
+ if (src_avail == 0) {
+ if (src_sg_len == 0)
+ break;
+ src_sg = sg_next(src_sg);
+ if (src_sg == NULL)
+ break;
+ src_sg_len--;
+ src_avail = sg_dma_len(src_sg);
+ }
+ }
+
+ if (list_empty(&desc->segments)) {
+ dev_err(chan->xdev->dev,
+ "%s: Zero-size SG transfer requested\n", __func__);
+ goto error;
+ }
+
+ /* Link the last hardware descriptor with the first. */
+ segment = list_first_entry(&desc->segments,
+ struct xilinx_cdma_tx_segment, node);
+ desc->async_tx.phys = segment->phys;
+ prev->hw.next_desc = segment->phys;
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return NULL;
+}
+
+/**
* xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
* @dchan: DMA channel
* @sgl: scatterlist to transfer to/from
@@ -2860,7 +2980,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
}
/* Request the interrupt */
- chan->irq = irq_of_parse_and_map(node, chan->tdest);
+ chan->irq = of_irq_get(node, chan->tdest);
+ if (chan->irq < 0)
+ return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n");
err = request_irq(chan->irq, xdev->dma_config->irq_handler,
IRQF_SHARED, "xilinx-dma-controller", chan);
if (err) {
@@ -2934,8 +3056,11 @@ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)
dev_warn(xdev->dev, "missing dma-channels property\n");
- for (i = 0; i < nr_channels; i++)
- xilinx_dma_chan_probe(xdev, node);
+ for (i = 0; i < nr_channels; i++) {
+ ret = xilinx_dma_chan_probe(xdev, node);
+ if (ret)
+ return ret;
+ }
return 0;
}
@@ -3115,7 +3240,9 @@ static int xilinx_dma_probe(struct platform_device *pdev)
DMA_RESIDUE_GRANULARITY_SEGMENT;
} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
+ dma_cap_set(DMA_MEMCPY_SG, xdev->common.cap_mask);
xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+ xdev->common.device_prep_dma_memcpy_sg = xilinx_cdma_prep_memcpy_sg;
/* Residue calculation is supported by only AXI DMA and CDMA */
xdev->common.residue_granularity =
DMA_RESIDUE_GRANULARITY_SEGMENT;
diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index 0cb440bdd5cb..f2b65d967384 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* drivers/extcon/extcon-usb-gpio.c - USB GPIO extcon driver
*
* Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index e7a9561a826d..a09e704fd0fa 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -576,19 +576,7 @@ EXPORT_SYMBOL_GPL(extcon_set_state);
*/
int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, bool state)
{
- int ret, index;
- unsigned long flags;
-
- index = find_cable_index_by_id(edev, id);
- if (index < 0)
- return index;
-
- /* Check whether the external connector's state is changed. */
- spin_lock_irqsave(&edev->lock, flags);
- ret = is_extcon_changed(edev, index, state);
- spin_unlock_irqrestore(&edev->lock, flags);
- if (!ret)
- return 0;
+ int ret;
ret = extcon_set_state(edev, id, state);
if (ret < 0)
diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c
index 87039c5c03fd..eefcc4146749 100644
--- a/drivers/firmware/arm_scmi/virtio.c
+++ b/drivers/firmware/arm_scmi/virtio.c
@@ -452,7 +452,7 @@ static void scmi_vio_remove(struct virtio_device *vdev)
* outstanding message on any vqueue to be ignored by complete_cb: now
* we can just stop processing buffers and destroy the vqueues.
*/
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
/* Ensure scmi_vdev is visible as NULL */
smp_store_mb(scmi_vdev, NULL);
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index e87e7f1b1a33..da93864d7abc 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -40,6 +40,8 @@
#ifdef CONFIG_ARM64
# define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64
+#elif defined(CONFIG_RISCV)
+# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN
#else
# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE
#endif
diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index 97968aece54f..931544c9f63d 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE
bool "Google Firmware Drivers"
default n
help
- These firmware drivers are used by Google's servers. They are
- only useful if you are working directly on one of their
- proprietary servers. If in doubt, say "N".
+ These firmware drivers are used by Google servers,
+ Chromebooks and other devices using coreboot firmware.
+ If in doubt, say "N".
if GOOGLE_FIRMWARE
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index c62f05420d32..a69399a6b7c0 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -388,9 +388,8 @@ static void fw_cfg_sysfs_cache_cleanup(void)
struct fw_cfg_sysfs_entry *entry, *next;
list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) {
- /* will end up invoking fw_cfg_sysfs_cache_delist()
- * via each object's release() method (i.e. destructor)
- */
+ fw_cfg_sysfs_cache_delist(entry);
+ kobject_del(&entry->kobj);
kobject_put(&entry->kobj);
}
}
@@ -449,7 +448,6 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
{
struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
- fw_cfg_sysfs_cache_delist(entry);
kfree(entry);
}
@@ -602,20 +600,18 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
/* set file entry information */
entry->size = be32_to_cpu(f->size);
entry->select = be16_to_cpu(f->select);
- memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
+ strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
fw_cfg_sel_ko, "%d", entry->select);
- if (err) {
- kobject_put(&entry->kobj);
- return err;
- }
+ if (err)
+ goto err_put_entry;
/* add raw binary content access */
err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
if (err)
- goto err_add_raw;
+ goto err_del_entry;
/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
@@ -624,9 +620,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
fw_cfg_sysfs_cache_enlist(entry);
return 0;
-err_add_raw:
+err_del_entry:
kobject_del(&entry->kobj);
- kfree(entry);
+err_put_entry:
+ kobject_put(&entry->kobj);
return err;
}
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 0dd117860b63..450c5f6a1cbf 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -23,6 +23,7 @@
#include <linux/hashtable.h>
#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/firmware/xlnx-event-manager.h>
#include "zynqmp-debug.h"
/* Max HashMap Order for PM API feature check (1<<7 = 128) */
@@ -38,6 +39,8 @@
static bool feature_check_enabled;
static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER);
+static struct platform_device *em_dev;
+
/**
* struct pm_api_feature_data - PM API Feature data
* @pm_api_id: PM API Id, used as key to index into hashmap
@@ -160,7 +163,7 @@ static noinline int do_fw_call_hvc(u64 arg0, u64 arg1, u64 arg2,
*
* Return: Returns status, either success or error+reason
*/
-static int zynqmp_pm_feature(u32 api_id)
+int zynqmp_pm_feature(const u32 api_id)
{
int ret;
u32 ret_payload[PAYLOAD_ARG_CNT];
@@ -197,6 +200,7 @@ static int zynqmp_pm_feature(u32 api_id)
return ret;
}
+EXPORT_SYMBOL_GPL(zynqmp_pm_feature);
/**
* zynqmp_pm_invoke_fn() - Invoke the system-level platform management layer
@@ -1117,6 +1121,29 @@ int zynqmp_pm_aes_engine(const u64 address, u32 *out)
EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine);
/**
+ * zynqmp_pm_register_notifier() - PM API for register a subsystem
+ * to be notified about specific
+ * event/error.
+ * @node: Node ID to which the event is related.
+ * @event: Event Mask of Error events for which wants to get notified.
+ * @wake: Wake subsystem upon capturing the event if value 1
+ * @enable: Enable the registration for value 1, disable for value 0
+ *
+ * This function is used to register/un-register for particular node-event
+ * combination in firmware.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+
+int zynqmp_pm_register_notifier(const u32 node, const u32 event,
+ const u32 wake, const u32 enable)
+{
+ return zynqmp_pm_invoke_fn(PM_REGISTER_NOTIFIER, node, event,
+ wake, enable, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_register_notifier);
+
+/**
* zynqmp_pm_system_shutdown - PM call to request a system shutdown or restart
* @type: Shutdown or restart? 0 for shutdown, 1 for restart
* @subtype: Specifies which system should be restarted or shut down
@@ -1471,6 +1498,15 @@ static int zynqmp_firmware_probe(struct platform_device *pdev)
zynqmp_pm_api_debugfs_init();
+ np = of_find_compatible_node(NULL, NULL, "xlnx,versal");
+ if (np) {
+ em_dev = platform_device_register_data(&pdev->dev, "xlnx_event_manager",
+ -1, NULL, 0);
+ if (IS_ERR(em_dev))
+ dev_err_probe(&pdev->dev, PTR_ERR(em_dev), "EM register fail with error\n");
+ }
+ of_node_put(np);
+
return of_platform_populate(dev->of_node, NULL, NULL, dev);
}
@@ -1488,6 +1524,8 @@ static int zynqmp_firmware_remove(struct platform_device *pdev)
kfree(feature_data);
}
+ platform_device_unregister(em_dev);
+
return 0;
}
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index ccf4546eff29..4ffb9da537d8 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -652,19 +652,15 @@ static int altera_cvp_probe(struct pci_dev *pdev,
snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s @%s",
ALTERA_CVP_MGR_NAME, pci_name(pdev));
- mgr = devm_fpga_mgr_create(&pdev->dev, conf->mgr_name,
- &altera_cvp_ops, conf);
- if (!mgr) {
- ret = -ENOMEM;
+ mgr = fpga_mgr_register(&pdev->dev, conf->mgr_name,
+ &altera_cvp_ops, conf);
+ if (IS_ERR(mgr)) {
+ ret = PTR_ERR(mgr);
goto err_unmap;
}
pci_set_drvdata(pdev, mgr);
- ret = fpga_mgr_register(mgr);
- if (ret)
- goto err_unmap;
-
return 0;
err_unmap:
diff --git a/drivers/fpga/altera-fpga2sdram.c b/drivers/fpga/altera-fpga2sdram.c
index a78e49c63c64..ff3a646fd9e3 100644
--- a/drivers/fpga/altera-fpga2sdram.c
+++ b/drivers/fpga/altera-fpga2sdram.c
@@ -121,17 +121,13 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
/* Get f2s bridge configuration saved in handoff register */
regmap_read(sysmgr, SYSMGR_ISWGRP_HANDOFF3, &priv->mask);
- br = devm_fpga_bridge_create(dev, F2S_BRIDGE_NAME,
- &altera_fpga2sdram_br_ops, priv);
- if (!br)
- return -ENOMEM;
+ br = fpga_bridge_register(dev, F2S_BRIDGE_NAME,
+ &altera_fpga2sdram_br_ops, priv);
+ if (IS_ERR(br))
+ return PTR_ERR(br);
platform_set_drvdata(pdev, br);
- ret = fpga_bridge_register(br);
- if (ret)
- return ret;
-
dev_info(dev, "driver initialized with handoff %08x\n", priv->mask);
if (!of_property_read_u32(dev->of_node, "bridge-enable", &enable)) {
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index 7d22a44d652e..445f4b011167 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -246,14 +246,14 @@ static int altera_freeze_br_probe(struct platform_device *pdev)
priv->base_addr = base_addr;
- br = devm_fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
- &altera_freeze_br_br_ops, priv);
- if (!br)
- return -ENOMEM;
+ br = fpga_bridge_register(dev, FREEZE_BRIDGE_NAME,
+ &altera_freeze_br_br_ops, priv);
+ if (IS_ERR(br))
+ return PTR_ERR(br);
platform_set_drvdata(pdev, br);
- return fpga_bridge_register(br);
+ return 0;
}
static int altera_freeze_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/altera-hps2fpga.c b/drivers/fpga/altera-hps2fpga.c
index 77b95f251821..aa758426c22b 100644
--- a/drivers/fpga/altera-hps2fpga.c
+++ b/drivers/fpga/altera-hps2fpga.c
@@ -180,19 +180,15 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
}
}
- br = devm_fpga_bridge_create(dev, priv->name,
- &altera_hps2fpga_br_ops, priv);
- if (!br) {
- ret = -ENOMEM;
+ br = fpga_bridge_register(dev, priv->name,
+ &altera_hps2fpga_br_ops, priv);
+ if (IS_ERR(br)) {
+ ret = PTR_ERR(br);
goto err;
}
platform_set_drvdata(pdev, br);
- ret = fpga_bridge_register(br);
- if (ret)
- goto err;
-
return 0;
err:
diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
index dfdf21ed34c4..be0667968d33 100644
--- a/drivers/fpga/altera-pr-ip-core.c
+++ b/drivers/fpga/altera-pr-ip-core.c
@@ -191,11 +191,8 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
(val & ALT_PR_CSR_STATUS_MSK) >> ALT_PR_CSR_STATUS_SFT,
(int)(val & ALT_PR_CSR_PR_START));
- mgr = devm_fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, dev_name(dev), &alt_pr_ops, priv);
+ return PTR_ERR_OR_ZERO(mgr);
}
EXPORT_SYMBOL_GPL(alt_pr_register);
diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c
index 23bfd4d1ad0f..5e1e009dba89 100644
--- a/drivers/fpga/altera-ps-spi.c
+++ b/drivers/fpga/altera-ps-spi.c
@@ -302,12 +302,9 @@ static int altera_ps_probe(struct spi_device *spi)
snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s %s",
dev_driver_string(&spi->dev), dev_name(&spi->dev));
- mgr = devm_fpga_mgr_create(&spi->dev, conf->mgr_name,
- &altera_ps_ops, conf);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(&spi->dev, mgr);
+ mgr = devm_fpga_mgr_register(&spi->dev, conf->mgr_name,
+ &altera_ps_ops, conf);
+ return PTR_ERR_OR_ZERO(mgr);
}
static const struct spi_device_id altera_ps_spi_ids[] = {
diff --git a/drivers/fpga/dfl-fme-br.c b/drivers/fpga/dfl-fme-br.c
index 3ff9f3a687ce..808d1f4d76df 100644
--- a/drivers/fpga/dfl-fme-br.c
+++ b/drivers/fpga/dfl-fme-br.c
@@ -68,14 +68,14 @@ static int fme_br_probe(struct platform_device *pdev)
priv->pdata = dev_get_platdata(dev);
- br = devm_fpga_bridge_create(dev, "DFL FPGA FME Bridge",
- &fme_bridge_ops, priv);
- if (!br)
- return -ENOMEM;
+ br = fpga_bridge_register(dev, "DFL FPGA FME Bridge",
+ &fme_bridge_ops, priv);
+ if (IS_ERR(br))
+ return PTR_ERR(br);
platform_set_drvdata(pdev, br);
- return fpga_bridge_register(br);
+ return 0;
}
static int fme_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/dfl-fme-mgr.c b/drivers/fpga/dfl-fme-mgr.c
index 313420405d5e..af0785783b52 100644
--- a/drivers/fpga/dfl-fme-mgr.c
+++ b/drivers/fpga/dfl-fme-mgr.c
@@ -276,7 +276,7 @@ static void fme_mgr_get_compat_id(void __iomem *fme_pr,
static int fme_mgr_probe(struct platform_device *pdev)
{
struct dfl_fme_mgr_pdata *pdata = dev_get_platdata(&pdev->dev);
- struct fpga_compat_id *compat_id;
+ struct fpga_manager_info info = { 0 };
struct device *dev = &pdev->dev;
struct fme_mgr_priv *priv;
struct fpga_manager *mgr;
@@ -296,20 +296,16 @@ static int fme_mgr_probe(struct platform_device *pdev)
return PTR_ERR(priv->ioaddr);
}
- compat_id = devm_kzalloc(dev, sizeof(*compat_id), GFP_KERNEL);
- if (!compat_id)
+ info.name = "DFL FME FPGA Manager";
+ info.mops = &fme_mgr_ops;
+ info.priv = priv;
+ info.compat_id = devm_kzalloc(dev, sizeof(*info.compat_id), GFP_KERNEL);
+ if (!info.compat_id)
return -ENOMEM;
- fme_mgr_get_compat_id(priv->ioaddr, compat_id);
-
- mgr = devm_fpga_mgr_create(dev, "DFL FME FPGA Manager",
- &fme_mgr_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- mgr->compat_id = compat_id;
-
- return devm_fpga_mgr_register(dev, mgr);
+ fme_mgr_get_compat_id(priv->ioaddr, info.compat_id);
+ mgr = devm_fpga_mgr_register_full(dev, &info);
+ return PTR_ERR_OR_ZERO(mgr);
}
static struct platform_driver fme_mgr_driver = {
diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c
index 1eeb42af1012..4aebde0a7f1c 100644
--- a/drivers/fpga/dfl-fme-region.c
+++ b/drivers/fpga/dfl-fme-region.c
@@ -30,6 +30,7 @@ static int fme_region_get_bridges(struct fpga_region *region)
static int fme_region_probe(struct platform_device *pdev)
{
struct dfl_fme_region_pdata *pdata = dev_get_platdata(&pdev->dev);
+ struct fpga_region_info info = { 0 };
struct device *dev = &pdev->dev;
struct fpga_region *region;
struct fpga_manager *mgr;
@@ -39,20 +40,18 @@ static int fme_region_probe(struct platform_device *pdev)
if (IS_ERR(mgr))
return -EPROBE_DEFER;
- region = devm_fpga_region_create(dev, mgr, fme_region_get_bridges);
- if (!region) {
- ret = -ENOMEM;
+ info.mgr = mgr;
+ info.compat_id = mgr->compat_id;
+ info.get_bridges = fme_region_get_bridges;
+ info.priv = pdata;
+ region = fpga_region_register_full(dev, &info);
+ if (IS_ERR(region)) {
+ ret = PTR_ERR(region);
goto eprobe_mgr_put;
}
- region->priv = pdata;
- region->compat_id = mgr->compat_id;
platform_set_drvdata(pdev, region);
- ret = fpga_region_register(region);
- if (ret)
- goto eprobe_mgr_put;
-
dev_dbg(dev, "DFL FME FPGA Region probed\n");
return 0;
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index f86666cf2c6a..599bb21d86af 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -1407,19 +1407,15 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
if (!cdev)
return ERR_PTR(-ENOMEM);
- cdev->region = devm_fpga_region_create(info->dev, NULL, NULL);
- if (!cdev->region) {
- ret = -ENOMEM;
- goto free_cdev_exit;
- }
-
cdev->parent = info->dev;
mutex_init(&cdev->lock);
INIT_LIST_HEAD(&cdev->port_dev_list);
- ret = fpga_region_register(cdev->region);
- if (ret)
+ cdev->region = fpga_region_register(info->dev, NULL, NULL);
+ if (IS_ERR(cdev->region)) {
+ ret = PTR_ERR(cdev->region);
goto free_cdev_exit;
+ }
/* create and init build info for enumeration */
binfo = devm_kzalloc(info->dev, sizeof(*binfo), GFP_KERNEL);
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 798f55670646..16f2b164a178 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -312,36 +312,41 @@ static struct attribute *fpga_bridge_attrs[] = {
ATTRIBUTE_GROUPS(fpga_bridge);
/**
- * fpga_bridge_create - create and initialize a struct fpga_bridge
+ * fpga_bridge_register - create and register an FPGA Bridge device
* @parent: FPGA bridge device from pdev
* @name: FPGA bridge name
* @br_ops: pointer to structure of fpga bridge ops
* @priv: FPGA bridge private data
*
- * The caller of this function is responsible for freeing the bridge with
- * fpga_bridge_free(). Using devm_fpga_bridge_create() instead is recommended.
- *
- * Return: struct fpga_bridge or NULL
+ * Return: struct fpga_bridge pointer or ERR_PTR()
*/
-struct fpga_bridge *fpga_bridge_create(struct device *parent, const char *name,
- const struct fpga_bridge_ops *br_ops,
- void *priv)
+struct fpga_bridge *
+fpga_bridge_register(struct device *parent, const char *name,
+ const struct fpga_bridge_ops *br_ops,
+ void *priv)
{
struct fpga_bridge *bridge;
int id, ret;
+ if (!br_ops) {
+ dev_err(parent, "Attempt to register without fpga_bridge_ops\n");
+ return ERR_PTR(-EINVAL);
+ }
+
if (!name || !strlen(name)) {
dev_err(parent, "Attempt to register with no name!\n");
- return NULL;
+ return ERR_PTR(-EINVAL);
}
bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
if (!bridge)
- return NULL;
+ return ERR_PTR(-ENOMEM);
id = ida_simple_get(&fpga_bridge_ida, 0, 0, GFP_KERNEL);
- if (id < 0)
+ if (id < 0) {
+ ret = id;
goto error_kfree;
+ }
mutex_init(&bridge->mutex);
INIT_LIST_HEAD(&bridge->node);
@@ -350,17 +355,23 @@ struct fpga_bridge *fpga_bridge_create(struct device *parent, const char *name,
bridge->br_ops = br_ops;
bridge->priv = priv;
- device_initialize(&bridge->dev);
bridge->dev.groups = br_ops->groups;
bridge->dev.class = fpga_bridge_class;
bridge->dev.parent = parent;
bridge->dev.of_node = parent->of_node;
bridge->dev.id = id;
+ of_platform_populate(bridge->dev.of_node, NULL, NULL, &bridge->dev);
ret = dev_set_name(&bridge->dev, "br%d", id);
if (ret)
goto error_device;
+ ret = device_register(&bridge->dev);
+ if (ret) {
+ put_device(&bridge->dev);
+ return ERR_PTR(ret);
+ }
+
return bridge;
error_device:
@@ -368,88 +379,7 @@ error_device:
error_kfree:
kfree(bridge);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(fpga_bridge_create);
-
-/**
- * fpga_bridge_free - free an fpga bridge created by fpga_bridge_create()
- * @bridge: FPGA bridge struct
- */
-void fpga_bridge_free(struct fpga_bridge *bridge)
-{
- ida_simple_remove(&fpga_bridge_ida, bridge->dev.id);
- kfree(bridge);
-}
-EXPORT_SYMBOL_GPL(fpga_bridge_free);
-
-static void devm_fpga_bridge_release(struct device *dev, void *res)
-{
- struct fpga_bridge *bridge = *(struct fpga_bridge **)res;
-
- fpga_bridge_free(bridge);
-}
-
-/**
- * devm_fpga_bridge_create - create and init a managed struct fpga_bridge
- * @parent: FPGA bridge device from pdev
- * @name: FPGA bridge name
- * @br_ops: pointer to structure of fpga bridge ops
- * @priv: FPGA bridge private data
- *
- * This function is intended for use in an FPGA bridge driver's probe function.
- * After the bridge driver creates the struct with devm_fpga_bridge_create(), it
- * should register the bridge with fpga_bridge_register(). The bridge driver's
- * remove function should call fpga_bridge_unregister(). The bridge struct
- * allocated with this function will be freed automatically on driver detach.
- * This includes the case of a probe function returning error before calling
- * fpga_bridge_register(), the struct will still get cleaned up.
- *
- * Return: struct fpga_bridge or NULL
- */
-struct fpga_bridge
-*devm_fpga_bridge_create(struct device *parent, const char *name,
- const struct fpga_bridge_ops *br_ops, void *priv)
-{
- struct fpga_bridge **ptr, *bridge;
-
- ptr = devres_alloc(devm_fpga_bridge_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
- return NULL;
-
- bridge = fpga_bridge_create(parent, name, br_ops, priv);
- if (!bridge) {
- devres_free(ptr);
- } else {
- *ptr = bridge;
- devres_add(parent, ptr);
- }
-
- return bridge;
-}
-EXPORT_SYMBOL_GPL(devm_fpga_bridge_create);
-
-/**
- * fpga_bridge_register - register an FPGA bridge
- *
- * @bridge: FPGA bridge struct
- *
- * Return: 0 for success, error code otherwise.
- */
-int fpga_bridge_register(struct fpga_bridge *bridge)
-{
- struct device *dev = &bridge->dev;
- int ret;
-
- ret = device_add(dev);
- if (ret)
- return ret;
-
- of_platform_populate(dev->of_node, NULL, NULL, dev);
-
- dev_info(dev->parent, "fpga bridge [%s] registered\n", bridge->name);
-
- return 0;
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(fpga_bridge_register);
@@ -475,6 +405,10 @@ EXPORT_SYMBOL_GPL(fpga_bridge_unregister);
static void fpga_bridge_dev_release(struct device *dev)
{
+ struct fpga_bridge *bridge = to_fpga_bridge(dev);
+
+ ida_simple_remove(&fpga_bridge_ida, bridge->dev.id);
+ kfree(bridge);
}
static int __init fpga_bridge_dev_init(void)
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index aa30889e2320..d49a9ce34568 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -592,49 +592,49 @@ void fpga_mgr_unlock(struct fpga_manager *mgr)
EXPORT_SYMBOL_GPL(fpga_mgr_unlock);
/**
- * fpga_mgr_create - create and initialize an FPGA manager struct
+ * fpga_mgr_register_full - create and register an FPGA Manager device
* @parent: fpga manager device from pdev
- * @name: fpga manager name
- * @mops: pointer to structure of fpga manager ops
- * @priv: fpga manager private data
+ * @info: parameters for fpga manager
*
- * The caller of this function is responsible for freeing the struct with
- * fpga_mgr_free(). Using devm_fpga_mgr_create() instead is recommended.
+ * The caller of this function is responsible for calling fpga_mgr_unregister().
+ * Using devm_fpga_mgr_register_full() instead is recommended.
*
- * Return: pointer to struct fpga_manager or NULL
+ * Return: pointer to struct fpga_manager pointer or ERR_PTR()
*/
-struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name,
- const struct fpga_manager_ops *mops,
- void *priv)
+struct fpga_manager *
+fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info)
{
+ const struct fpga_manager_ops *mops = info->mops;
struct fpga_manager *mgr;
int id, ret;
if (!mops) {
dev_err(parent, "Attempt to register without fpga_manager_ops\n");
- return NULL;
+ return ERR_PTR(-EINVAL);
}
- if (!name || !strlen(name)) {
+ if (!info->name || !strlen(info->name)) {
dev_err(parent, "Attempt to register with no name!\n");
- return NULL;
+ return ERR_PTR(-EINVAL);
}
mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
if (!mgr)
- return NULL;
+ return ERR_PTR(-ENOMEM);
id = ida_simple_get(&fpga_mgr_ida, 0, 0, GFP_KERNEL);
- if (id < 0)
+ if (id < 0) {
+ ret = id;
goto error_kfree;
+ }
mutex_init(&mgr->ref_mutex);
- mgr->name = name;
- mgr->mops = mops;
- mgr->priv = priv;
+ mgr->name = info->name;
+ mgr->mops = info->mops;
+ mgr->priv = info->priv;
+ mgr->compat_id = info->compat_id;
- device_initialize(&mgr->dev);
mgr->dev.class = fpga_mgr_class;
mgr->dev.groups = mops->groups;
mgr->dev.parent = parent;
@@ -645,6 +645,19 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name,
if (ret)
goto error_device;
+ /*
+ * Initialize framework state by requesting low level driver read state
+ * from device. FPGA may be in reset mode or may have been programmed
+ * by bootloader or EEPROM.
+ */
+ mgr->state = fpga_mgr_state(mgr);
+
+ ret = device_register(&mgr->dev);
+ if (ret) {
+ put_device(&mgr->dev);
+ return ERR_PTR(ret);
+ }
+
return mgr;
error_device:
@@ -652,96 +665,36 @@ error_device:
error_kfree:
kfree(mgr);
- return NULL;
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(fpga_mgr_create);
+EXPORT_SYMBOL_GPL(fpga_mgr_register_full);
/**
- * fpga_mgr_free - free an FPGA manager created with fpga_mgr_create()
- * @mgr: fpga manager struct
- */
-void fpga_mgr_free(struct fpga_manager *mgr)
-{
- ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
- kfree(mgr);
-}
-EXPORT_SYMBOL_GPL(fpga_mgr_free);
-
-static void devm_fpga_mgr_release(struct device *dev, void *res)
-{
- struct fpga_mgr_devres *dr = res;
-
- fpga_mgr_free(dr->mgr);
-}
-
-/**
- * devm_fpga_mgr_create - create and initialize a managed FPGA manager struct
+ * fpga_mgr_register - create and register an FPGA Manager device
* @parent: fpga manager device from pdev
* @name: fpga manager name
* @mops: pointer to structure of fpga manager ops
* @priv: fpga manager private data
*
- * This function is intended for use in an FPGA manager driver's probe function.
- * After the manager driver creates the manager struct with
- * devm_fpga_mgr_create(), it should register it with fpga_mgr_register(). The
- * manager driver's remove function should call fpga_mgr_unregister(). The
- * manager struct allocated with this function will be freed automatically on
- * driver detach. This includes the case of a probe function returning error
- * before calling fpga_mgr_register(), the struct will still get cleaned up.
+ * The caller of this function is responsible for calling fpga_mgr_unregister().
+ * Using devm_fpga_mgr_register() instead is recommended. This simple
+ * version of the register function should be sufficient for most users. The
+ * fpga_mgr_register_full() function is available for users that need to pass
+ * additional, optional parameters.
*
- * Return: pointer to struct fpga_manager or NULL
+ * Return: pointer to struct fpga_manager pointer or ERR_PTR()
*/
-struct fpga_manager *devm_fpga_mgr_create(struct device *parent, const char *name,
- const struct fpga_manager_ops *mops,
- void *priv)
+struct fpga_manager *
+fpga_mgr_register(struct device *parent, const char *name,
+ const struct fpga_manager_ops *mops, void *priv)
{
- struct fpga_mgr_devres *dr;
+ struct fpga_manager_info info = { 0 };
- dr = devres_alloc(devm_fpga_mgr_release, sizeof(*dr), GFP_KERNEL);
- if (!dr)
- return NULL;
+ info.name = name;
+ info.mops = mops;
+ info.priv = priv;
- dr->mgr = fpga_mgr_create(parent, name, mops, priv);
- if (!dr->mgr) {
- devres_free(dr);
- return NULL;
- }
-
- devres_add(parent, dr);
-
- return dr->mgr;
-}
-EXPORT_SYMBOL_GPL(devm_fpga_mgr_create);
-
-/**
- * fpga_mgr_register - register an FPGA manager
- * @mgr: fpga manager struct
- *
- * Return: 0 on success, negative error code otherwise.
- */
-int fpga_mgr_register(struct fpga_manager *mgr)
-{
- int ret;
-
- /*
- * Initialize framework state by requesting low level driver read state
- * from device. FPGA may be in reset mode or may have been programmed
- * by bootloader or EEPROM.
- */
- mgr->state = fpga_mgr_state(mgr);
-
- ret = device_add(&mgr->dev);
- if (ret)
- goto error_device;
-
- dev_info(&mgr->dev, "%s registered\n", mgr->name);
-
- return 0;
-
-error_device:
- ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
-
- return ret;
+ return fpga_mgr_register_full(parent, &info);
}
EXPORT_SYMBOL_GPL(fpga_mgr_register);
@@ -765,14 +718,6 @@ void fpga_mgr_unregister(struct fpga_manager *mgr)
}
EXPORT_SYMBOL_GPL(fpga_mgr_unregister);
-static int fpga_mgr_devres_match(struct device *dev, void *res,
- void *match_data)
-{
- struct fpga_mgr_devres *dr = res;
-
- return match_data == dr->mgr;
-}
-
static void devm_fpga_mgr_unregister(struct device *dev, void *res)
{
struct fpga_mgr_devres *dr = res;
@@ -781,45 +726,67 @@ static void devm_fpga_mgr_unregister(struct device *dev, void *res)
}
/**
- * devm_fpga_mgr_register - resource managed variant of fpga_mgr_register()
- * @dev: managing device for this FPGA manager
- * @mgr: fpga manager struct
+ * devm_fpga_mgr_register_full - resource managed variant of fpga_mgr_register()
+ * @parent: fpga manager device from pdev
+ * @info: parameters for fpga manager
*
- * This is the devres variant of fpga_mgr_register() for which the unregister
+ * This is the devres variant of fpga_mgr_register_full() for which the unregister
* function will be called automatically when the managing device is detached.
*/
-int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr)
+struct fpga_manager *
+devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info)
{
struct fpga_mgr_devres *dr;
- int ret;
-
- /*
- * Make sure that the struct fpga_manager * that is passed in is
- * managed itself.
- */
- if (WARN_ON(!devres_find(dev, devm_fpga_mgr_release,
- fpga_mgr_devres_match, mgr)))
- return -EINVAL;
+ struct fpga_manager *mgr;
dr = devres_alloc(devm_fpga_mgr_unregister, sizeof(*dr), GFP_KERNEL);
if (!dr)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- ret = fpga_mgr_register(mgr);
- if (ret) {
+ mgr = fpga_mgr_register_full(parent, info);
+ if (IS_ERR(mgr)) {
devres_free(dr);
- return ret;
+ return mgr;
}
dr->mgr = mgr;
- devres_add(dev, dr);
+ devres_add(parent, dr);
- return 0;
+ return mgr;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_mgr_register_full);
+
+/**
+ * devm_fpga_mgr_register - resource managed variant of fpga_mgr_register()
+ * @parent: fpga manager device from pdev
+ * @name: fpga manager name
+ * @mops: pointer to structure of fpga manager ops
+ * @priv: fpga manager private data
+ *
+ * This is the devres variant of fpga_mgr_register() for which the
+ * unregister function will be called automatically when the managing
+ * device is detached.
+ */
+struct fpga_manager *
+devm_fpga_mgr_register(struct device *parent, const char *name,
+ const struct fpga_manager_ops *mops, void *priv)
+{
+ struct fpga_manager_info info = { 0 };
+
+ info.name = name;
+ info.mops = mops;
+ info.priv = priv;
+
+ return devm_fpga_mgr_register_full(parent, &info);
}
EXPORT_SYMBOL_GPL(devm_fpga_mgr_register);
static void fpga_mgr_dev_release(struct device *dev)
{
+ struct fpga_manager *mgr = to_fpga_manager(dev);
+
+ ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
+ kfree(mgr);
}
static int __init fpga_mgr_class_init(void)
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index a4838715221f..b0ac18de4885 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -180,39 +180,42 @@ static struct attribute *fpga_region_attrs[] = {
ATTRIBUTE_GROUPS(fpga_region);
/**
- * fpga_region_create - alloc and init a struct fpga_region
+ * fpga_region_register_full - create and register an FPGA Region device
* @parent: device parent
- * @mgr: manager that programs this region
- * @get_bridges: optional function to get bridges to a list
- *
- * The caller of this function is responsible for freeing the resulting region
- * struct with fpga_region_free(). Using devm_fpga_region_create() instead is
- * recommended.
+ * @info: parameters for FPGA Region
*
- * Return: struct fpga_region or NULL
+ * Return: struct fpga_region or ERR_PTR()
*/
-struct fpga_region
-*fpga_region_create(struct device *parent,
- struct fpga_manager *mgr,
- int (*get_bridges)(struct fpga_region *))
+struct fpga_region *
+fpga_region_register_full(struct device *parent, const struct fpga_region_info *info)
{
struct fpga_region *region;
int id, ret = 0;
+ if (!info) {
+ dev_err(parent,
+ "Attempt to register without required info structure\n");
+ return ERR_PTR(-EINVAL);
+ }
+
region = kzalloc(sizeof(*region), GFP_KERNEL);
if (!region)
- return NULL;
+ return ERR_PTR(-ENOMEM);
id = ida_simple_get(&fpga_region_ida, 0, 0, GFP_KERNEL);
- if (id < 0)
+ if (id < 0) {
+ ret = id;
goto err_free;
+ }
+
+ region->mgr = info->mgr;
+ region->compat_id = info->compat_id;
+ region->priv = info->priv;
+ region->get_bridges = info->get_bridges;
- region->mgr = mgr;
- region->get_bridges = get_bridges;
mutex_init(&region->mutex);
INIT_LIST_HEAD(&region->bridge_list);
- device_initialize(&region->dev);
region->dev.class = fpga_region_class;
region->dev.parent = parent;
region->dev.of_node = parent->of_node;
@@ -222,6 +225,12 @@ struct fpga_region
if (ret)
goto err_remove;
+ ret = device_register(&region->dev);
+ if (ret) {
+ put_device(&region->dev);
+ return ERR_PTR(ret);
+ }
+
return region;
err_remove:
@@ -229,76 +238,32 @@ err_remove:
err_free:
kfree(region);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(fpga_region_create);
-
-/**
- * fpga_region_free - free an FPGA region created by fpga_region_create()
- * @region: FPGA region
- */
-void fpga_region_free(struct fpga_region *region)
-{
- ida_simple_remove(&fpga_region_ida, region->dev.id);
- kfree(region);
-}
-EXPORT_SYMBOL_GPL(fpga_region_free);
-
-static void devm_fpga_region_release(struct device *dev, void *res)
-{
- struct fpga_region *region = *(struct fpga_region **)res;
-
- fpga_region_free(region);
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL_GPL(fpga_region_register_full);
/**
- * devm_fpga_region_create - create and initialize a managed FPGA region struct
+ * fpga_region_register - create and register an FPGA Region device
* @parent: device parent
* @mgr: manager that programs this region
* @get_bridges: optional function to get bridges to a list
*
- * This function is intended for use in an FPGA region driver's probe function.
- * After the region driver creates the region struct with
- * devm_fpga_region_create(), it should register it with fpga_region_register().
- * The region driver's remove function should call fpga_region_unregister().
- * The region struct allocated with this function will be freed automatically on
- * driver detach. This includes the case of a probe function returning error
- * before calling fpga_region_register(), the struct will still get cleaned up.
+ * This simple version of the register function should be sufficient for most users.
+ * The fpga_region_register_full() function is available for users that need to
+ * pass additional, optional parameters.
*
- * Return: struct fpga_region or NULL
+ * Return: struct fpga_region or ERR_PTR()
*/
-struct fpga_region
-*devm_fpga_region_create(struct device *parent,
- struct fpga_manager *mgr,
- int (*get_bridges)(struct fpga_region *))
+struct fpga_region *
+fpga_region_register(struct device *parent, struct fpga_manager *mgr,
+ int (*get_bridges)(struct fpga_region *))
{
- struct fpga_region **ptr, *region;
-
- ptr = devres_alloc(devm_fpga_region_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
- return NULL;
+ struct fpga_region_info info = { 0 };
- region = fpga_region_create(parent, mgr, get_bridges);
- if (!region) {
- devres_free(ptr);
- } else {
- *ptr = region;
- devres_add(parent, ptr);
- }
+ info.mgr = mgr;
+ info.get_bridges = get_bridges;
- return region;
-}
-EXPORT_SYMBOL_GPL(devm_fpga_region_create);
-
-/**
- * fpga_region_register - register an FPGA region
- * @region: FPGA region
- *
- * Return: 0 or -errno
- */
-int fpga_region_register(struct fpga_region *region)
-{
- return device_add(&region->dev);
+ return fpga_region_register_full(parent, &info);
}
EXPORT_SYMBOL_GPL(fpga_region_register);
@@ -316,6 +281,10 @@ EXPORT_SYMBOL_GPL(fpga_region_unregister);
static void fpga_region_dev_release(struct device *dev)
{
+ struct fpga_region *region = to_fpga_region(dev);
+
+ ida_simple_remove(&fpga_region_ida, region->dev.id);
+ kfree(region);
}
/**
diff --git a/drivers/fpga/ice40-spi.c b/drivers/fpga/ice40-spi.c
index 029d3cdb918d..7cbb3558b844 100644
--- a/drivers/fpga/ice40-spi.c
+++ b/drivers/fpga/ice40-spi.c
@@ -178,12 +178,9 @@ static int ice40_fpga_probe(struct spi_device *spi)
return ret;
}
- mgr = devm_fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
- &ice40_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, "Lattice iCE40 FPGA Manager",
+ &ice40_fpga_ops, priv);
+ return PTR_ERR_OR_ZERO(mgr);
}
static const struct of_device_id ice40_fpga_of_match[] = {
diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c
index ea2ec3c6815c..905607992a12 100644
--- a/drivers/fpga/machxo2-spi.c
+++ b/drivers/fpga/machxo2-spi.c
@@ -370,12 +370,9 @@ static int machxo2_spi_probe(struct spi_device *spi)
return -EINVAL;
}
- mgr = devm_fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
- &machxo2_ops, spi);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, "Lattice MachXO2 SPI FPGA Manager",
+ &machxo2_ops, spi);
+ return PTR_ERR_OR_ZERO(mgr);
}
#ifdef CONFIG_OF
diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c
index e3c25576b6b9..50b83057c048 100644
--- a/drivers/fpga/of-fpga-region.c
+++ b/drivers/fpga/of-fpga-region.c
@@ -405,16 +405,12 @@ static int of_fpga_region_probe(struct platform_device *pdev)
if (IS_ERR(mgr))
return -EPROBE_DEFER;
- region = devm_fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
- if (!region) {
- ret = -ENOMEM;
+ region = fpga_region_register(dev, mgr, of_fpga_region_get_bridges);
+ if (IS_ERR(region)) {
+ ret = PTR_ERR(region);
goto eprobe_mgr_put;
}
- ret = fpga_region_register(region);
- if (ret)
- goto eprobe_mgr_put;
-
of_platform_populate(np, fpga_region_of_match, NULL, &region->dev);
platform_set_drvdata(pdev, region);
@@ -448,7 +444,7 @@ static struct platform_driver of_fpga_region_driver = {
};
/**
- * fpga_region_init - init function for fpga_region class
+ * of_fpga_region_init - init function for fpga_region class
* Creates the fpga_region class and registers a reconfig notifier.
*/
static int __init of_fpga_region_init(void)
diff --git a/drivers/fpga/socfpga-a10.c b/drivers/fpga/socfpga-a10.c
index 573d88bdf730..ac8e89b8a5cc 100644
--- a/drivers/fpga/socfpga-a10.c
+++ b/drivers/fpga/socfpga-a10.c
@@ -508,19 +508,15 @@ static int socfpga_a10_fpga_probe(struct platform_device *pdev)
return -EBUSY;
}
- mgr = devm_fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
- &socfpga_a10_fpga_mgr_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- platform_set_drvdata(pdev, mgr);
-
- ret = fpga_mgr_register(mgr);
- if (ret) {
+ mgr = fpga_mgr_register(dev, "SoCFPGA Arria10 FPGA Manager",
+ &socfpga_a10_fpga_mgr_ops, priv);
+ if (IS_ERR(mgr)) {
clk_disable_unprepare(priv->clk);
- return ret;
+ return PTR_ERR(mgr);
}
+ platform_set_drvdata(pdev, mgr);
+
return 0;
}
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index 1f467173fc1f..7e0741f99696 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -571,12 +571,9 @@ static int socfpga_fpga_probe(struct platform_device *pdev)
if (ret)
return ret;
- mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
- &socfpga_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager",
+ &socfpga_fpga_ops, priv);
+ return PTR_ERR_OR_ZERO(mgr);
}
#ifdef CONFIG_OF
diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c
index 047fd7f23706..357cea58ec98 100644
--- a/drivers/fpga/stratix10-soc.c
+++ b/drivers/fpga/stratix10-soc.c
@@ -419,23 +419,16 @@ static int s10_probe(struct platform_device *pdev)
init_completion(&priv->status_return_completion);
- mgr = fpga_mgr_create(dev, "Stratix10 SOC FPGA Manager",
- &s10_ops, priv);
- if (!mgr) {
- dev_err(dev, "unable to create FPGA manager\n");
- ret = -ENOMEM;
- goto probe_err;
- }
-
- ret = fpga_mgr_register(mgr);
- if (ret) {
+ mgr = fpga_mgr_register(dev, "Stratix10 SOC FPGA Manager",
+ &s10_ops, priv);
+ if (IS_ERR(mgr)) {
dev_err(dev, "unable to register FPGA manager\n");
- fpga_mgr_free(mgr);
+ ret = PTR_ERR(mgr);
goto probe_err;
}
platform_set_drvdata(pdev, mgr);
- return ret;
+ return 0;
probe_err:
stratix10_svc_free_channel(priv->chan);
@@ -448,7 +441,6 @@ static int s10_remove(struct platform_device *pdev)
struct s10_priv *priv = mgr->priv;
fpga_mgr_unregister(mgr);
- fpga_mgr_free(mgr);
stratix10_svc_free_channel(priv->chan);
return 0;
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index 167abb0b08d4..8e6e9c840d9d 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -116,12 +116,9 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
if (IS_ERR(priv->io_base))
return PTR_ERR(priv->io_base);
- mgr = devm_fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
- &ts73xx_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(kdev, mgr);
+ mgr = devm_fpga_mgr_register(kdev, "TS-73xx FPGA Manager",
+ &ts73xx_fpga_ops, priv);
+ return PTR_ERR_OR_ZERO(mgr);
}
static struct platform_driver ts73xx_fpga_driver = {
diff --git a/drivers/fpga/versal-fpga.c b/drivers/fpga/versal-fpga.c
index 5b0dda304bd2..e1601b3a345b 100644
--- a/drivers/fpga/versal-fpga.c
+++ b/drivers/fpga/versal-fpga.c
@@ -54,12 +54,9 @@ static int versal_fpga_probe(struct platform_device *pdev)
return ret;
}
- mgr = devm_fpga_mgr_create(dev, "Xilinx Versal FPGA Manager",
- &versal_fpga_ops, NULL);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, "Xilinx Versal FPGA Manager",
+ &versal_fpga_ops, NULL);
+ return PTR_ERR_OR_ZERO(mgr);
}
static const struct of_device_id versal_fpga_of_match[] = {
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index e986ed47c4ed..2d9c491f7be9 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -140,22 +140,17 @@ static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
clk_disable(priv->clk);
- br = devm_fpga_bridge_create(&pdev->dev, priv->ipconfig->name,
- &xlnx_pr_decoupler_br_ops, priv);
- if (!br) {
- err = -ENOMEM;
- goto err_clk;
- }
-
- platform_set_drvdata(pdev, br);
-
- err = fpga_bridge_register(br);
- if (err) {
+ br = fpga_bridge_register(&pdev->dev, priv->ipconfig->name,
+ &xlnx_pr_decoupler_br_ops, priv);
+ if (IS_ERR(br)) {
+ err = PTR_ERR(br);
dev_err(&pdev->dev, "unable to register %s",
priv->ipconfig->name);
goto err_clk;
}
+ platform_set_drvdata(pdev, br);
+
return 0;
err_clk:
diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c
index b6bcf1d9233d..e1a227e7ff2a 100644
--- a/drivers/fpga/xilinx-spi.c
+++ b/drivers/fpga/xilinx-spi.c
@@ -247,13 +247,10 @@ static int xilinx_spi_probe(struct spi_device *spi)
return dev_err_probe(&spi->dev, PTR_ERR(conf->done),
"Failed to get DONE gpio\n");
- mgr = devm_fpga_mgr_create(&spi->dev,
- "Xilinx Slave Serial FPGA Manager",
- &xilinx_spi_ops, conf);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(&spi->dev, mgr);
+ mgr = devm_fpga_mgr_register(&spi->dev,
+ "Xilinx Slave Serial FPGA Manager",
+ &xilinx_spi_ops, conf);
+ return PTR_ERR_OR_ZERO(mgr);
}
#ifdef CONFIG_OF
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 9b75bd4f93d8..426aa34c6a0d 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -609,20 +609,16 @@ static int zynq_fpga_probe(struct platform_device *pdev)
clk_disable(priv->clk);
- mgr = devm_fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
- &zynq_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- platform_set_drvdata(pdev, mgr);
-
- err = fpga_mgr_register(mgr);
- if (err) {
+ mgr = fpga_mgr_register(dev, "Xilinx Zynq FPGA Manager",
+ &zynq_fpga_ops, priv);
+ if (IS_ERR(mgr)) {
dev_err(dev, "unable to register FPGA manager\n");
clk_unprepare(priv->clk);
- return err;
+ return PTR_ERR(mgr);
}
+ platform_set_drvdata(pdev, mgr);
+
return 0;
}
diff --git a/drivers/fpga/zynqmp-fpga.c b/drivers/fpga/zynqmp-fpga.c
index 7d3d5650c322..c60f20949c47 100644
--- a/drivers/fpga/zynqmp-fpga.c
+++ b/drivers/fpga/zynqmp-fpga.c
@@ -95,12 +95,9 @@ static int zynqmp_fpga_probe(struct platform_device *pdev)
priv->dev = dev;
- mgr = devm_fpga_mgr_create(dev, "Xilinx ZynqMP FPGA Manager",
- &zynqmp_fpga_ops, priv);
- if (!mgr)
- return -ENOMEM;
-
- return devm_fpga_mgr_register(dev, mgr);
+ mgr = devm_fpga_mgr_register(dev, "Xilinx ZynqMP FPGA Manager",
+ &zynqmp_fpga_ops, priv);
+ return PTR_ERR_OR_ZERO(mgr);
}
#ifdef CONFIG_OF
diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig
index bd12e3d57baa..d7fe265c2869 100644
--- a/drivers/gnss/Kconfig
+++ b/drivers/gnss/Kconfig
@@ -54,4 +54,15 @@ config GNSS_UBX_SERIAL
If unsure, say N.
+config GNSS_USB
+ tristate "USB GNSS receiver support"
+ depends on USB
+ help
+ Say Y here if you have a GNSS receiver which uses a USB interface.
+
+ To compile this driver as a module, choose M here: the module will
+ be called gnss-usb.
+
+ If unsure, say N.
+
endif # GNSS
diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile
index 451f11401ecc..bb2cbada3435 100644
--- a/drivers/gnss/Makefile
+++ b/drivers/gnss/Makefile
@@ -17,3 +17,6 @@ gnss-sirf-y := sirf.o
obj-$(CONFIG_GNSS_UBX_SERIAL) += gnss-ubx.o
gnss-ubx-y := ubx.o
+
+obj-$(CONFIG_GNSS_USB) += gnss-usb.o
+gnss-usb-y := usb.o
diff --git a/drivers/gnss/mtk.c b/drivers/gnss/mtk.c
index d1fc55560daf..c62b1211f4fe 100644
--- a/drivers/gnss/mtk.c
+++ b/drivers/gnss/mtk.c
@@ -126,7 +126,7 @@ static void mtk_remove(struct serdev_device *serdev)
if (data->vbackup)
regulator_disable(data->vbackup);
gnss_serial_free(gserial);
-};
+}
#ifdef CONFIG_OF
static const struct of_device_id mtk_of_match[] = {
diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c
index def64b36d994..5d8e9bfb24d0 100644
--- a/drivers/gnss/serial.c
+++ b/drivers/gnss/serial.c
@@ -165,7 +165,7 @@ void gnss_serial_free(struct gnss_serial *gserial)
{
gnss_put_device(gserial->gdev);
kfree(gserial);
-};
+}
EXPORT_SYMBOL_GPL(gnss_serial_free);
int gnss_serial_register(struct gnss_serial *gserial)
diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c
index 2ecb1d3e8eeb..bcb53ccfee4d 100644
--- a/drivers/gnss/sirf.c
+++ b/drivers/gnss/sirf.c
@@ -551,7 +551,7 @@ static void sirf_remove(struct serdev_device *serdev)
regulator_disable(data->vcc);
gnss_put_device(data->gdev);
-};
+}
#ifdef CONFIG_OF
static const struct of_device_id sirf_of_match[] = {
diff --git a/drivers/gnss/ubx.c b/drivers/gnss/ubx.c
index 7b05bc40532e..c951be202ca2 100644
--- a/drivers/gnss/ubx.c
+++ b/drivers/gnss/ubx.c
@@ -126,7 +126,7 @@ static void ubx_remove(struct serdev_device *serdev)
if (data->v_bckp)
regulator_disable(data->v_bckp);
gnss_serial_free(gserial);
-};
+}
#ifdef CONFIG_OF
static const struct of_device_id ubx_of_match[] = {
diff --git a/drivers/gnss/usb.c b/drivers/gnss/usb.c
new file mode 100644
index 000000000000..028ce56b20ea
--- /dev/null
+++ b/drivers/gnss/usb.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic USB GNSS receiver driver
+ *
+ * Copyright (C) 2021 Johan Hovold <johan@kernel.org>
+ */
+
+#include <linux/errno.h>
+#include <linux/gnss.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+
+#define GNSS_USB_READ_BUF_LEN 512
+#define GNSS_USB_WRITE_TIMEOUT 1000
+
+static const struct usb_device_id gnss_usb_id_table[] = {
+ { USB_DEVICE(0x1199, 0xb000) }, /* Sierra Wireless XM1210 */
+ { }
+};
+MODULE_DEVICE_TABLE(usb, gnss_usb_id_table);
+
+struct gnss_usb {
+ struct usb_device *udev;
+ struct usb_interface *intf;
+ struct gnss_device *gdev;
+ struct urb *read_urb;
+ unsigned int write_pipe;
+};
+
+static void gnss_usb_rx_complete(struct urb *urb)
+{
+ struct gnss_usb *gusb = urb->context;
+ struct gnss_device *gdev = gusb->gdev;
+ int status = urb->status;
+ int len;
+ int ret;
+
+ switch (status) {
+ case 0:
+ break;
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ESHUTDOWN:
+ dev_dbg(&gdev->dev, "urb stopped: %d\n", status);
+ return;
+ case -EPIPE:
+ dev_err(&gdev->dev, "urb stopped: %d\n", status);
+ return;
+ default:
+ dev_dbg(&gdev->dev, "nonzero urb status: %d\n", status);
+ goto resubmit;
+ }
+
+ len = urb->actual_length;
+ if (len == 0)
+ goto resubmit;
+
+ ret = gnss_insert_raw(gdev, urb->transfer_buffer, len);
+ if (ret < len)
+ dev_dbg(&gdev->dev, "dropped %d bytes\n", len - ret);
+resubmit:
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (ret && ret != -EPERM && ret != -ENODEV)
+ dev_err(&gdev->dev, "failed to resubmit urb: %d\n", ret);
+}
+
+static int gnss_usb_open(struct gnss_device *gdev)
+{
+ struct gnss_usb *gusb = gnss_get_drvdata(gdev);
+ int ret;
+
+ ret = usb_submit_urb(gusb->read_urb, GFP_KERNEL);
+ if (ret) {
+ if (ret != -EPERM && ret != -ENODEV)
+ dev_err(&gdev->dev, "failed to submit urb: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void gnss_usb_close(struct gnss_device *gdev)
+{
+ struct gnss_usb *gusb = gnss_get_drvdata(gdev);
+
+ usb_kill_urb(gusb->read_urb);
+}
+
+static int gnss_usb_write_raw(struct gnss_device *gdev,
+ const unsigned char *buf, size_t count)
+{
+ struct gnss_usb *gusb = gnss_get_drvdata(gdev);
+ void *tbuf;
+ int ret;
+
+ tbuf = kmemdup(buf, count, GFP_KERNEL);
+ if (!tbuf)
+ return -ENOMEM;
+
+ ret = usb_bulk_msg(gusb->udev, gusb->write_pipe, tbuf, count, NULL,
+ GNSS_USB_WRITE_TIMEOUT);
+ kfree(tbuf);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static const struct gnss_operations gnss_usb_gnss_ops = {
+ .open = gnss_usb_open,
+ .close = gnss_usb_close,
+ .write_raw = gnss_usb_write_raw,
+};
+
+static int gnss_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
+{
+ struct usb_device *udev = interface_to_usbdev(intf);
+ struct usb_endpoint_descriptor *in, *out;
+ struct gnss_device *gdev;
+ struct gnss_usb *gusb;
+ struct urb *urb;
+ size_t buf_len;
+ void *buf;
+ int ret;
+
+ ret = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL,
+ NULL);
+ if (ret)
+ return ret;
+
+ gusb = kzalloc(sizeof(*gusb), GFP_KERNEL);
+ if (!gusb)
+ return -ENOMEM;
+
+ gdev = gnss_allocate_device(&intf->dev);
+ if (!gdev) {
+ ret = -ENOMEM;
+ goto err_free_gusb;
+ }
+
+ gdev->ops = &gnss_usb_gnss_ops;
+ gdev->type = GNSS_TYPE_NMEA;
+ gnss_set_drvdata(gdev, gusb);
+
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb) {
+ ret = -ENOMEM;
+ goto err_put_gdev;
+ }
+
+ buf_len = max(usb_endpoint_maxp(in), GNSS_USB_READ_BUF_LEN);
+
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto err_free_urb;
+ }
+
+ usb_fill_bulk_urb(urb, udev,
+ usb_rcvbulkpipe(udev, usb_endpoint_num(in)),
+ buf, buf_len, gnss_usb_rx_complete, gusb);
+
+ gusb->intf = intf;
+ gusb->udev = udev;
+ gusb->gdev = gdev;
+ gusb->read_urb = urb;
+ gusb->write_pipe = usb_sndbulkpipe(udev, usb_endpoint_num(out));
+
+ ret = gnss_register_device(gdev);
+ if (ret)
+ goto err_free_buf;
+
+ usb_set_intfdata(intf, gusb);
+
+ return 0;
+
+err_free_buf:
+ kfree(buf);
+err_free_urb:
+ usb_free_urb(urb);
+err_put_gdev:
+ gnss_put_device(gdev);
+err_free_gusb:
+ kfree(gusb);
+
+ return ret;
+}
+
+static void gnss_usb_disconnect(struct usb_interface *intf)
+{
+ struct gnss_usb *gusb = usb_get_intfdata(intf);
+
+ gnss_deregister_device(gusb->gdev);
+
+ kfree(gusb->read_urb->transfer_buffer);
+ usb_free_urb(gusb->read_urb);
+ gnss_put_device(gusb->gdev);
+ kfree(gusb);
+}
+
+static struct usb_driver gnss_usb_driver = {
+ .name = "gnss-usb",
+ .probe = gnss_usb_probe,
+ .disconnect = gnss_usb_disconnect,
+ .id_table = gnss_usb_id_table,
+};
+module_usb_driver(gnss_usb_driver);
+
+MODULE_AUTHOR("Johan Hovold <johan@kernel.org>");
+MODULE_DESCRIPTION("Generic USB GNSS receiver driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c
index 50003ad2e589..52b8b72ded77 100644
--- a/drivers/gpio/gpio-idt3243x.c
+++ b/drivers/gpio/gpio-idt3243x.c
@@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct gpio_irq_chip *girq;
struct idt_gpio_ctrl *ctrl;
- unsigned int parent_irq;
+ int parent_irq;
int ngpios;
int ret;
@@ -164,8 +164,8 @@ static int idt_gpio_probe(struct platform_device *pdev)
return PTR_ERR(ctrl->pic);
parent_irq = platform_get_irq(pdev, 0);
- if (!parent_irq)
- return -EINVAL;
+ if (parent_irq < 0)
+ return parent_irq;
girq = &ctrl->gc.irq;
girq->chip = &idt_gpio_irqchip;
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 70d6ae20b1da..a964e25ea620 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -47,7 +47,7 @@ struct mpc8xxx_gpio_chip {
unsigned offset, int value);
struct irq_domain *irq;
- unsigned int irqn;
+ int irqn;
};
/*
@@ -388,8 +388,8 @@ static int mpc8xxx_probe(struct platform_device *pdev)
}
mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
- if (!mpc8xxx_gc->irqn)
- return 0;
+ if (mpc8xxx_gc->irqn < 0)
+ return mpc8xxx_gc->irqn;
mpc8xxx_gc->irq = irq_domain_create_linear(fwnode,
MPC8XXX_GPIO_PINS,
diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c
index 9f4941bc5760..fcc5e8c08973 100644
--- a/drivers/gpio/gpio-virtio.c
+++ b/drivers/gpio/gpio-virtio.c
@@ -450,7 +450,7 @@ static void virtio_gpio_request_vq(struct virtqueue *vq)
static void virtio_gpio_free_vqs(struct virtio_device *vdev)
{
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 776a947b45df..6ca1db3c243f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -514,13 +514,6 @@ out_put:
return r;
}
-uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev)
-{
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
-
- return amdgpu_vram_mgr_usage(vram_man);
-}
-
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
struct amdgpu_device *src)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 61f899e54fd5..ac841ae8f5cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -223,7 +223,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags);
-uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
struct amdgpu_device *src);
int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0311d799a010..06d07502a1f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -298,7 +298,6 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
{
s64 time_us, increment_us;
u64 free_vram, total_vram, used_vram;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
@@ -315,7 +314,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
}
total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = amdgpu_vram_mgr_usage(vram_man);
+ used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
@@ -362,7 +361,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a8b08a72b71b..ed077de426d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -552,7 +552,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
}
/**
- * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
+ * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
*
* this function is invoked only the debugfs register access
*/
@@ -567,6 +567,8 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0);
+ } else if ((reg * 4) >= adev->rmmio_size) {
+ adev->pcie_wreg(adev, reg * 4, v);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -1448,7 +1450,7 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
break;
default:
- return -EINVAL;
+ break;
}
return 0;
@@ -2352,7 +2354,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev))
- amdgpu_virt_exchange_data(adev);
+ amdgpu_virt_init_data_exchange(adev);
r = amdgpu_ib_pool_init(adev);
if (r) {
@@ -3496,9 +3498,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
- r = amdgpu_device_init_apu_flags(adev);
- if (r)
- return r;
+ amdgpu_device_init_apu_flags(adev);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -3833,6 +3833,7 @@ failed:
static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
{
+
/* Clear all CPU mappings pointing to this device */
unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
@@ -3913,6 +3914,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
+ int idx;
+
amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
@@ -3937,6 +3940,14 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_unregister(adev->pdev);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ iounmap(adev->rmmio);
+ adev->rmmio = NULL;
+ amdgpu_device_doorbell_fini(adev);
+ drm_dev_exit(idx);
+ }
+
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (adev->mman.discovery_bin)
@@ -3957,8 +3968,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
*/
static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
{
- /* No need to evict vram on APUs for suspend to ram */
- if (adev->in_s3 && (adev->flags & AMD_IS_APU))
+ /* No need to evict vram on APUs for suspend to ram or s2idle */
+ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
return;
if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
@@ -4005,16 +4016,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
- /* First evict vram memory */
amdgpu_device_evict_resources(adev);
amdgpu_fence_driver_hw_fini(adev);
amdgpu_device_ip_suspend_phase2(adev);
- /* This second call to evict device resources is to evict
- * the gart page table using the CPU.
- */
- amdgpu_device_evict_resources(adev);
return 0;
}
@@ -4359,8 +4365,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
goto error;
amdgpu_virt_init_data_exchange(adev);
- /* we need recover gart prior to run SMC/CP/SDMA resume */
- amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
r = amdgpu_device_fw_loading(adev);
if (r)
@@ -4446,33 +4450,24 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_VEGA20:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_RAVEN:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_ALDEBARAN:
- break;
- default:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_CYAN_SKILLFISH:
goto disabled;
+ default:
+ break;
}
}
@@ -4680,10 +4675,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
amdgpu_inc_vram_lost(tmp_adev);
}
- r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
- if (r)
- goto out;
-
r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 028190d42bb2..81bfee978b74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -243,6 +243,30 @@ static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
}
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * So far, apply this quirk only on those Navy Flounder boards which
+ * have a bad harvest table of VCN config.
+ */
+ if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) {
+ switch (adev->pdev->revision) {
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC5:
+ case 0xC7:
+ case 0xCF:
+ case 0xDF:
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
@@ -548,10 +572,9 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
break;
}
}
- /* some IP discovery tables on Navy Flounder don't have this set correctly */
- if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2)))
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+
+ amdgpu_discovery_harvest_config_quirk(adev);
+
if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b63ed1ddf713..b21bcdc97460 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1930,11 +1930,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
- if (flags == 0) {
- DRM_INFO("Unsupported asic. Remove me when IP discovery init is in place.\n");
- return -ENODEV;
- }
-
if (amdgpu_virtual_display ||
amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
supports_atomic = true;
@@ -2194,9 +2189,9 @@ static int amdgpu_pmops_suspend(struct device *dev)
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = true;
- adev->in_s3 = true;
+ else
+ adev->in_s3 = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s3 = false;
if (r)
return r;
if (!adev->in_s0ix)
@@ -2217,6 +2212,8 @@ static int amdgpu_pmops_resume(struct device *dev)
r = amdgpu_device_resume(drm_dev, true);
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = false;
+ else
+ adev->in_s3 = false;
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index d3e4203f6217..645950a653a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -114,80 +114,12 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
- int r;
-
- if (adev->gart.bo == NULL) {
- struct amdgpu_bo_param bp;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = adev->gart.table_size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
- if (r) {
- return r;
- }
- }
- return 0;
-}
-
-/**
- * amdgpu_gart_table_vram_pin - pin gart page table in vram
- *
- * @adev: amdgpu_device pointer
- *
- * Pin the GART page table in vram so it will not be moved
- * by the memory manager (pcie r4xx, r5xx+). These asics require the
- * gart table to be in video memory.
- * Returns 0 for success, error for failure.
- */
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gart.bo, false);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
- if (r) {
- amdgpu_bo_unreserve(adev->gart.bo);
- return r;
- }
- r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
- if (r)
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- return r;
-}
-
-/**
- * amdgpu_gart_table_vram_unpin - unpin gart page table in vram
- *
- * @adev: amdgpu_device pointer
- *
- * Unpin the GART page table in vram (pcie r4xx, r5xx+).
- * These asics require the gart table to be in video memory.
- */
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
+ if (adev->gart.bo != NULL)
+ return 0;
- if (adev->gart.bo == NULL) {
- return;
- }
- r = amdgpu_bo_reserve(adev->gart.bo, true);
- if (likely(r == 0)) {
- amdgpu_bo_kunmap(adev->gart.bo);
- amdgpu_bo_unpin(adev->gart.bo);
- amdgpu_bo_unreserve(adev->gart.bo);
- adev->gart.ptr = NULL;
- }
+ return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+ NULL, (void *)&adev->gart.ptr);
}
/**
@@ -201,11 +133,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
- if (adev->gart.bo == NULL) {
- return;
- }
- amdgpu_bo_unref(&adev->gart.bo);
- adev->gart.ptr = NULL;
+ amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9a6507af1670..c0d8f40a5b45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -264,9 +264,6 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
vma->vm_flags &= ~VM_MAYWRITE;
- if (bo->kfd_bo)
- vma->vm_flags |= VM_DONTCOPY;
-
return drm_gem_ttm_mmap(obj, vma);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 675a72ef305d..72022df264f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -77,10 +77,8 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr));
}
static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
@@ -206,30 +204,27 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
/**
* amdgpu_gtt_mgr_usage - return usage of GTT domain
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_gtt_mgr pointer
*
* Return how many bytes are used in the GTT domain
*/
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
+uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr)
{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
-
return atomic64_read(&mgr->used) * PAGE_SIZE;
}
/**
* amdgpu_gtt_mgr_recover - re-init gart
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_gtt_mgr pointer
*
* Re-init the gart for each known BO in the GTT.
*/
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
+int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
{
- struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
- struct amdgpu_device *adev;
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
+ struct amdgpu_device *adev;
int r = 0;
adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 09ad17944eb2..1ebb91db2274 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -678,13 +678,13 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
- ui64 = amdgpu_vram_mgr_vis_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM));
+ ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE:
- ui64 = amdgpu_gtt_mgr_usage(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
+ ui64 = amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GDS_CONFIG: {
struct drm_amdgpu_info_gds gds_info;
@@ -715,8 +715,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
case AMDGPU_INFO_MEMORY: {
struct drm_amdgpu_memory_info mem;
- struct ttm_resource_manager *vram_man =
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
struct ttm_resource_manager *gtt_man =
ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
memset(&mem, 0, sizeof(mem));
@@ -725,7 +723,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->vram_pin_size) -
AMDGPU_VM_RESERVED_VRAM;
mem.vram.heap_usage =
- amdgpu_vram_mgr_usage(vram_man);
+ amdgpu_vram_mgr_usage(&adev->mman.vram_mgr);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -735,7 +733,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
mem.vram.usable_heap_size);
mem.cpu_accessible_vram.heap_usage =
- amdgpu_vram_mgr_vis_usage(vram_man);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
@@ -744,7 +742,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage =
- amdgpu_gtt_mgr_usage(gtt_man);
+ amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 3a7b56e57cec..5661b82d84d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/dma-buf.h>
+#include <drm/drm_drv.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
@@ -1061,7 +1062,18 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
*/
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
+ int idx;
+
amdgpu_ttm_fini(adev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
+ drm_dev_exit(idx);
+ }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 91e6e87562ac..8f47c14ecbc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1592,6 +1592,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
/* Let IP handle its data, maybe we need get the output
* from the callback to udpate the error type/count, etc
*/
+ memset(&err_data, 0, sizeof(err_data));
ret = data->cb(obj->adev, &err_data, &entry);
/* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system.
@@ -1838,8 +1839,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
.size = AMDGPU_GPU_PAGE_SIZE,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
- status = amdgpu_vram_mgr_query_page_status(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
data->bps[i].retired_page);
if (status == -EBUSY)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
@@ -1940,8 +1940,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
goto out;
}
- amdgpu_vram_mgr_reserve_range(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+ amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fb0d8bffdce2..5c3f24069f2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
#include <linux/sizes.h>
#include <linux/module.h>
+#include <drm/drm_drv.h>
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
@@ -1804,6 +1805,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*/
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
+ int idx;
if (!adev->mman.initialized)
return;
@@ -1818,6 +1820,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
+
+ drm_dev_exit(idx);
+ }
+
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7346ecff4438..f8f48be16d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -114,8 +114,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
-uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
-int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
+uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr);
+int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
@@ -129,11 +129,11 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt);
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man);
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr);
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr);
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
int amdgpu_ttm_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f8e574cc0e22..07bc0f504713 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -553,7 +553,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
{
struct amd_sriov_msg_vf2pf_info *vf2pf_info;
- struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
@@ -576,8 +575,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->driver_cert = 0;
vf2pf_info->os_info.all = 0;
- vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20;
- vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20;
+ vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr) >> 20;
+ vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
@@ -626,20 +625,20 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
- if (adev->bios != NULL) {
- adev->virt.vf2pf_update_interval_ms = 2000;
+ if (adev->mman.fw_vram_usage_va != NULL) {
+ /* go through this logic in ip_init and reset to init workqueue*/
+ amdgpu_virt_exchange_data(adev);
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+ } else if (adev->bios != NULL) {
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
amdgpu_virt_read_pf2vf_data(adev);
}
-
- if (adev->virt.vf2pf_update_interval_ms != 0) {
- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
- schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
- }
}
@@ -675,12 +674,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
}
- } else if (adev->bios != NULL) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
- amdgpu_virt_read_pf2vf_data(adev);
}
}
@@ -727,6 +720,10 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
vi_set_virt_ops(adev);
break;
case CHIP_VEGA10:
+ soc15_set_virt_ops(adev);
+ /* send a dummy GPU_INIT_DATA request to host on vega10 */
+ amdgpu_virt_request_init_data(adev);
+ break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 2dcc68e04e84..d99c8779b51e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -144,15 +144,16 @@ static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
+ unsigned long flags;
if (crtc->state->event) {
- spin_lock(&crtc->dev->event_lock);
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
if (drm_crtc_vblank_get(crtc) != 0)
drm_crtc_send_vblank_event(crtc, crtc->state->event);
else
drm_crtc_arm_vblank_event(crtc, crtc->state->event);
- spin_unlock(&crtc->dev->event_lock);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
crtc->state->event = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7b2b0980ec41..7a2b487db57c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -96,10 +96,9 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
+ return sysfs_emit(buf, "%llu\n",
+ amdgpu_vram_mgr_usage(&adev->mman.vram_mgr));
}
/**
@@ -116,10 +115,9 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man;
- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
+ return sysfs_emit(buf, "%llu\n",
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr));
}
/**
@@ -263,16 +261,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
/**
* amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of the range in VRAM
* @size: size of the range
*
- * Reserve memory from start addess with the specified size in VRAM
+ * Reserve memory from start address with the specified size in VRAM
*/
-int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
@@ -285,7 +282,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
spin_lock(&mgr->lock);
list_add_tail(&mgr->reservations_pending, &rsv->node);
- amdgpu_vram_mgr_do_reserve(man);
+ amdgpu_vram_mgr_do_reserve(&mgr->manager);
spin_unlock(&mgr->lock);
return 0;
@@ -294,7 +291,7 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
/**
* amdgpu_vram_mgr_query_page_status - query the reservation status
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
* @start: start address of a page in VRAM
*
* Returns:
@@ -302,10 +299,9 @@ int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
* 0: the page has been reserved
* -ENOENT: the input page is not a reservation
*/
-int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
int ret;
@@ -632,28 +628,24 @@ void amdgpu_vram_mgr_free_sgt(struct device *dev,
/**
* amdgpu_vram_mgr_usage - how many bytes are used in this domain
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
*
* Returns how many bytes are used in this domain.
*/
-uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man)
+uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
-
return atomic64_read(&mgr->usage);
}
/**
* amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
*
- * @man: TTM memory type manager
+ * @mgr: amdgpu_vram_mgr pointer
*
* Returns how many bytes are used in the visible part of VRAM
*/
-uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man)
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
{
- struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
-
return atomic64_read(&mgr->vis_usage);
}
@@ -675,8 +667,8 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
spin_unlock(&mgr->lock);
drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
- man->size, amdgpu_vram_mgr_usage(man) >> 20,
- amdgpu_vram_mgr_vis_usage(man) >> 20);
+ man->size, amdgpu_vram_mgr_usage(mgr) >> 20,
+ amdgpu_vram_mgr_vis_usage(mgr) >> 20);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a38c6a747fa4..e8b8f28c2f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -208,6 +208,7 @@ static struct attribute *amdgpu_xgmi_hive_attrs[] = {
&amdgpu_xgmi_hive_id,
NULL
};
+ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
struct attribute *attr, char *buf)
@@ -237,7 +238,7 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
struct kobj_type amdgpu_xgmi_hive_type = {
.release = amdgpu_xgmi_hive_release,
.sysfs_ops = &amdgpu_xgmi_hive_ops,
- .default_attrs = amdgpu_xgmi_hive_attrs,
+ .default_groups = amdgpu_xgmi_hive_groups,
};
static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 54f28c075f21..f10ce740a29c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1428,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3d5d47a799e3..38bb42727715 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -989,7 +989,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;
- r = amdgpu_gart_table_vram_pin(adev);
+ r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;
@@ -1060,7 +1060,6 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
static int gmc_v10_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 0fe714f54cca..cd6c38e083d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -476,7 +476,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
+ r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;
@@ -608,7 +608,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
(0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
- amdgpu_gart_table_vram_unpin(adev);
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0a50fdaced7e..ab8adbff9e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -620,7 +620,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
+ r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;
@@ -758,7 +758,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 63b890f1e8af..054733838292 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -844,7 +844,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
- r = amdgpu_gart_table_vram_pin(adev);
+ r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;
@@ -999,7 +999,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
- amdgpu_gart_table_vram_unpin(adev);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 57f2729a7bd0..88c1eb9ad068 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -72,6 +72,9 @@
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
+
static const char *gfxhub_client_ids[] = {
"CB",
@@ -1134,6 +1137,8 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
unsigned size;
+ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
+
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = AMDGPU_VBIOS_VGA_ALLOCATION;
} else {
@@ -1142,7 +1147,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
switch (adev->ip_versions[DCE_HWIP][0]) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
- case IP_VERSION(2, 1, 0):
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
size = (REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
@@ -1150,6 +1154,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
4);
break;
+ case IP_VERSION(2, 1, 0):
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
@@ -1743,7 +1755,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;
- r = amdgpu_gart_table_vram_pin(adev);
+ r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;
@@ -1821,7 +1833,6 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
{
adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
- amdgpu_gart_table_vram_unpin(adev);
}
static int gmc_v9_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 0077e738db31..56da5ab82987 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -180,6 +180,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
}
+ } else if (req == IDH_REQ_GPU_INIT_DATA){
+ /* Dummy REQ_GPU_INIT_DATA handling */
+ r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
+ /* version set to 0 since dummy */
+ adev->virt.req_init_data_ver = 0;
}
return 0;
@@ -381,10 +386,16 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
+{
+ return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
.reset_gpu = xgpu_ai_request_reset,
.wait_reset = NULL,
.trans_msg = xgpu_ai_mailbox_trans_msg,
+ .req_init_data = xgpu_ai_request_init_data,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index f9aa4d0bb638..fa7e13e0459e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -35,6 +35,7 @@ enum idh_request {
IDH_REQ_GPU_FINI_ACCESS,
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
+ IDH_REQ_GPU_INIT_DATA,
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
@@ -48,6 +49,7 @@ enum idh_event {
IDH_SUCCESS,
IDH_FAIL,
IDH_QUERY_ALIVE,
+ IDH_REQ_GPU_INIT_DATA_READY,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index fe9a7cc8d9eb..6645ebbd2696 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -956,6 +956,10 @@ static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "BACO reset\n");
r = amdgpu_dpm_baco_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index f187596faf66..9624bbe8b501 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1060,6 +1060,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ if (!props2)
+ return -ENOMEM;
+
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 127d41d0e4f0..2b65d0acae2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -68,20 +68,20 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
case IP_VERSION(4, 0, 1):/* VEGA12 */
case IP_VERSION(4, 1, 0):/* RAVEN */
case IP_VERSION(4, 1, 1):/* RAVEN */
- case IP_VERSION(4, 1, 2):/* RENIOR */
+ case IP_VERSION(4, 1, 2):/* RENOIR */
case IP_VERSION(5, 2, 1):/* VANGOGH */
case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
kfd->device_info.num_sdma_queues_per_engine = 2;
break;
case IP_VERSION(4, 2, 0):/* VEGA20 */
- case IP_VERSION(4, 2, 2):/* ARCTUTUS */
+ case IP_VERSION(4, 2, 2):/* ARCTURUS */
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
case IP_VERSION(5, 0, 5):/* NAVI12 */
case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
- case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
kfd->device_info.num_sdma_queues_per_engine = 8;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 19890e350107..4b6814949aad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1004,14 +1004,17 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
+ int r = 0;
+
pr_info("SW scheduler is used");
init_interrupts(dqm);
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
- return pm_init(&dqm->packet_mgr, dqm);
- dqm->sched_running = true;
+ r = pm_init(&dqm->packet_mgr, dqm);
+ if (!r)
+ dqm->sched_running = true;
- return 0;
+ return r;
}
static int stop_nocpsch(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index b8ac28fb1231..e8bc28009c22 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -197,6 +197,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*/
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SDMA_ECC ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
((client_id == SOC15_IH_CLIENTID_VMC ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index f1930ff2c74a..d1145da5348f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -461,6 +461,7 @@ static struct attribute *procfs_queue_attrs[] = {
&attr_queue_gpuid,
NULL
};
+ATTRIBUTE_GROUPS(procfs_queue);
static const struct sysfs_ops procfs_queue_ops = {
.show = kfd_procfs_queue_show,
@@ -468,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = {
static struct kobj_type procfs_queue_type = {
.sysfs_ops = &procfs_queue_ops,
- .default_attrs = procfs_queue_attrs,
+ .default_groups = procfs_queue_groups,
};
static const struct sysfs_ops procfs_stats_ops = {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index aa5ee91cd595..f2805ba74c80 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -107,7 +107,7 @@ static void svm_range_add_to_svms(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
prange, prange->start, prange->last);
- list_add_tail(&prange->list, &prange->svms->list);
+ list_move_tail(&prange->list, &prange->svms->list);
prange->it_node.start = prange->start;
prange->it_node.last = prange->last;
interval_tree_insert(&prange->it_node, &prange->svms->objects);
@@ -295,8 +295,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
prange->last = last;
INIT_LIST_HEAD(&prange->list);
INIT_LIST_HEAD(&prange->update_list);
- INIT_LIST_HEAD(&prange->remove_list);
- INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->svm_bo_list);
INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list);
@@ -1018,7 +1016,7 @@ svm_range_split_tail(struct svm_range *prange,
int r = svm_range_split(prange, prange->start, new_last, &tail);
if (!r)
- list_add(&tail->insert_list, insert_list);
+ list_add(&tail->list, insert_list);
return r;
}
@@ -1030,7 +1028,7 @@ svm_range_split_head(struct svm_range *prange,
int r = svm_range_split(prange, new_start, prange->last, &head);
if (!r)
- list_add(&head->insert_list, insert_list);
+ list_add(&head->list, insert_list);
return r;
}
@@ -1898,8 +1896,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
goto out;
}
- list_add(&old->remove_list, remove_list);
- list_add(&prange->insert_list, insert_list);
+ list_add(&old->update_list, remove_list);
+ list_add(&prange->list, insert_list);
list_add(&prange->update_list, update_list);
if (node->start < start) {
@@ -1931,7 +1929,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
goto out;
}
- list_add(&prange->insert_list, insert_list);
+ list_add(&prange->list, insert_list);
list_add(&prange->update_list, update_list);
}
@@ -1946,13 +1944,13 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
r = -ENOMEM;
goto out;
}
- list_add(&prange->insert_list, insert_list);
+ list_add(&prange->list, insert_list);
list_add(&prange->update_list, update_list);
}
out:
if (r)
- list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
+ list_for_each_entry_safe(prange, tmp, insert_list, list)
svm_range_free(prange);
return r;
@@ -3236,7 +3234,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
goto out;
}
/* Apply changes as a transaction */
- list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
+ list_for_each_entry_safe(prange, next, &insert_list, list) {
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
}
@@ -3244,8 +3242,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svm_range_apply_attrs(p, prange, nattr, attrs);
/* TODO: unmap ranges from GPU that lost access */
}
- list_for_each_entry_safe(prange, next, &remove_list,
- remove_list) {
+ list_for_each_entry_safe(prange, next, &remove_list, update_list) {
pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange, prange->start,
prange->last);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 2f8a95e86dcb..949b477e2f4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -76,8 +76,6 @@ struct svm_work_list_item {
* aligned, page size is (last - start + 1)
* @list: link list node, used to scan all ranges of svms
* @update_list:link list node used to add to update_list
- * @remove_list:link list node used to add to remove list
- * @insert_list:link list node used to add to insert list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
* @dma_addr: dma mapping address on each GPU for system memory physical page
@@ -113,8 +111,6 @@ struct svm_range {
struct interval_tree_node it_node;
struct list_head list;
struct list_head update_list;
- struct list_head remove_list;
- struct list_head insert_list;
uint64_t npages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2f0b14f8f833..7f9773f8dab6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -658,7 +658,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
struct drm_connector_list_iter iter;
struct dc_link *link;
uint8_t link_index = 0;
- struct drm_device *dev = adev->dm.ddev;
+ struct drm_device *dev;
if (adev == NULL)
return;
@@ -675,6 +675,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
link_index = notify->link_index;
link = adev->dm.dc->links[link_index];
+ dev = adev->dm.ddev;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -1161,6 +1162,32 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
return 0;
}
+static void dm_dmub_hw_resume(struct amdgpu_device *adev)
+{
+ struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
+ enum dmub_status status;
+ bool init;
+
+ if (!dmub_srv) {
+ /* DMUB isn't supported on the ASIC. */
+ return;
+ }
+
+ status = dmub_srv_is_hw_init(dmub_srv, &init);
+ if (status != DMUB_STATUS_OK)
+ DRM_WARN("DMUB hardware init check failed: %d\n", status);
+
+ if (status == DMUB_STATUS_OK && init) {
+ /* Wait for firmware load to finish. */
+ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
+ if (status != DMUB_STATUS_OK)
+ DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+ } else {
+ /* Perform the full hardware initialization. */
+ dm_dmub_hw_init(adev);
+ }
+}
+
#if defined(CONFIG_DRM_AMD_DC_DCN)
static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
{
@@ -2637,9 +2664,7 @@ static int dm_resume(void *handle)
amdgpu_dm_outbox_init(adev);
/* Before powering on DC we need to re-initialize DMUB. */
- r = dm_dmub_hw_init(adev);
- if (r)
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ dm_dmub_hw_resume(adev);
/* power on hardware */
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
@@ -6073,6 +6098,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
struct dsc_dec_dpcd_caps *dsc_caps)
{
stream->timing.flags.DSC = 0;
+ dsc_caps->is_dsc_supported = false;
if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
sink->sink_signal == SIGNAL_TYPE_EDP)) {
@@ -10737,6 +10763,8 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state->dc_state = dc_new_plane_state;
+ dm_new_crtc_state->mpo_requested |= (plane->type == DRM_PLANE_TYPE_OVERLAY);
+
/* Tell DC to do a full surface update every time there
* is a plane change. Inefficient, but works for now.
*/
@@ -10889,7 +10917,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
enum dc_status status;
int ret, i;
bool lock_and_validation_needed = false;
- struct dm_crtc_state *dm_old_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
#if defined(CONFIG_DRM_AMD_DC_DCN)
struct dsc_mst_fairness_vars vars[MAX_PIPES];
struct drm_dp_mst_topology_state *mst_state;
@@ -11071,6 +11099,12 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->mpo_requested)
+ DRM_DEBUG_DRIVER("MPO enablement requested on crtc:[%p]\n", crtc);
+ }
+
/* Check cursor planes scaling */
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
ret = dm_check_crtc_cursor(state, crtc, new_crtc_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c98e402eab0c..b9a69b0cef23 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -626,6 +626,8 @@ struct dm_crtc_state {
bool cm_has_degamma;
bool cm_is_degamma_srgb;
+ bool mpo_requested;
+
int update_type;
int active_planes;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 9f35f2e8f971..cac80ba69072 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -38,7 +38,6 @@
#include "clk/clk_11_0_0_offset.h"
#include "clk/clk_11_0_0_sh_mask.h"
-#include "irq/dcn20/irq_service_dcn20.h"
#undef FN
#define FN(reg_name, field_name) \
@@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
bool force_reset = false;
bool p_state_change_support;
int total_plane_count;
- int irq_src;
- uint32_t hpd_state;
if (dc->work_arounds.skip_clock_update)
return;
@@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->res_pool->pp_smu)
pp_smu = &dc->res_pool->pp_smu->nv_funcs;
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD6; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
- if (display_count == 0 && !hpd_state)
+ if (display_count == 0)
enter_display_off = true;
if (enter_display_off == safe_to_lower) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index fbda42313bfe..f4dee0e48a67 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -42,7 +42,6 @@
#include "clk/clk_10_0_2_sh_mask.h"
#include "renoir_ip_offset.h"
-#include "irq/dcn21/irq_service_dcn21.h"
/* Constants */
@@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
int display_count;
- int irq_src;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
- uint32_t hpd_state;
struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
@@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = rn_get_active_display_cnt_wa(dc, context);
- for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD5; irq_src++) {
- hpd_state = dc_get_hpd_state_dcn21(dc->res_pool->irqs, irq_src);
- if (hpd_state)
- break;
- }
-
/* if we can go lower, go lower */
- if (display_count == 0 && !hpd_state) {
+ if (display_count == 0) {
rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index b7ace235a2d5..a1011f3273f3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -119,6 +119,12 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
+ if (result == VBIOSSMC_Result_Failed) {
+ ASSERT(0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
+ return -1;
+ }
+
if (IS_SMU_TIMEOUT(result)) {
ASSERT(0);
dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index dc1380b6c5e0..b5e570d33ca9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3971,102 +3971,73 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
{
struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct link_encoder *link_enc = NULL;
-#endif
+ struct cp_psp_stream_config config = {0};
+ enum dp_panel_mode panel_mode =
+ dp_get_panel_mode(pipe_ctx->stream->link);
- if (cp_psp && cp_psp->funcs.update_stream_config) {
- struct cp_psp_stream_config config = {0};
- enum dp_panel_mode panel_mode =
- dp_get_panel_mode(pipe_ctx->stream->link);
+ if (cp_psp == NULL || cp_psp->funcs.update_stream_config == NULL)
+ return;
- config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
- /*stream_enc_inst*/
- config.dig_fe = (uint8_t) pipe_ctx->stream_res.stream_enc->stream_enc_inst;
- config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA;
-
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY ||
- pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY)
- link_enc = pipe_ctx->stream->link->link_enc;
- else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
- if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(
- pipe_ctx->stream->ctx->dc,
- pipe_ctx->stream);
- }
- ASSERT(link_enc);
+ if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY)
+ link_enc = pipe_ctx->stream->link->link_enc;
+ else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign)
+ link_enc = link_enc_cfg_get_link_enc_used_by_stream(
+ pipe_ctx->stream->ctx->dc,
+ pipe_ctx->stream);
+ ASSERT(link_enc);
+ if (link_enc == NULL)
+ return;
- // Initialize PHY ID with ABCDE - 01234 mapping except when it is B0
- config.phy_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
+ /* otg instance */
+ config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
- // Add flag to guard new A0 DIG mapping
- if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true &&
- pipe_ctx->stream->link->dc->ctx->dce_version == DCN_VERSION_3_1) {
- config.dig_be = link_enc->preferred_engine;
- config.dio_output_type = pipe_ctx->stream->link->ep_type;
- config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
- } else {
- config.dio_output_type = 0;
- config.dio_output_idx = 0;
- }
+ /* dig front end */
+ config.dig_fe = (uint8_t) pipe_ctx->stream_res.stream_enc->stream_enc_inst;
- // Add flag to guard B0 implementation
- if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true &&
- link_enc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
- if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- // enum ID 1-4 maps to DPIA PHY ID 0-3
- config.phy_idx = pipe_ctx->stream->link->link_id.enum_id - ENUM_ID_1;
- } else { // for non DPIA mode over B0, ABCDE maps to 01564
-
- switch (link_enc->transmitter) {
- case TRANSMITTER_UNIPHY_A:
- config.phy_idx = 0;
- break;
- case TRANSMITTER_UNIPHY_B:
- config.phy_idx = 1;
- break;
- case TRANSMITTER_UNIPHY_C:
- config.phy_idx = 5;
- break;
- case TRANSMITTER_UNIPHY_D:
- config.phy_idx = 6;
- break;
- case TRANSMITTER_UNIPHY_E:
- config.phy_idx = 4;
- break;
- default:
- config.phy_idx = 0;
- break;
- }
+ /* stream encoder index */
+ config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ if (is_dp_128b_132b_signal(pipe_ctx))
+ config.stream_enc_idx =
+ pipe_ctx->stream_res.hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0;
+#endif
- }
- }
- } else if (pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) {
- link_enc = link_enc_cfg_get_link_enc_used_by_stream(
- pipe_ctx->stream->ctx->dc,
- pipe_ctx->stream);
- config.phy_idx = 0; /* Clear phy_idx for non-physical display endpoints. */
- }
- ASSERT(link_enc);
- if (link_enc)
- config.link_enc_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- config.stream_enc_idx = pipe_ctx->stream_res.hpo_dp_stream_enc->id - ENGINE_ID_HPO_DP_0;
+ /* dig back end */
+ config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst;
- config.link_enc_idx = pipe_ctx->link_res.hpo_dp_link_enc->inst;
- config.dp2_enabled = 1;
- }
+ /* link encoder index */
+ config.link_enc_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ if (is_dp_128b_132b_signal(pipe_ctx))
+ config.link_enc_idx = pipe_ctx->link_res.hpo_dp_link_enc->inst;
#endif
- config.dpms_off = dpms_off;
- config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context;
- config.assr_enabled = (panel_mode == DP_PANEL_MODE_EDP);
- config.mst_enabled = (pipe_ctx->stream->signal ==
- SIGNAL_TYPE_DISPLAY_PORT_MST);
- cp_psp->funcs.update_stream_config(cp_psp->handle, &config);
- }
+ /* dio output index */
+ config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A;
+
+ /* phy index */
+ config.phy_idx = resource_transmitter_to_phy_idx(
+ pipe_ctx->stream->link->dc, link_enc->transmitter);
+ if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ /* USB4 DPIA doesn't use PHY in our soc, initialize it to 0 */
+ config.phy_idx = 0;
+
+ /* stream properties */
+ config.assr_enabled = (panel_mode == DP_PANEL_MODE_EDP) ? 1 : 0;
+ config.mst_enabled = (pipe_ctx->stream->signal ==
+ SIGNAL_TYPE_DISPLAY_PORT_MST) ? 1 : 0;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ config.dp2_enabled = is_dp_128b_132b_signal(pipe_ctx) ? 1 : 0;
+#endif
+ config.usb4_enabled = (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ?
+ 1 : 0;
+ config.dpms_off = dpms_off;
+
+ /* dm stream context */
+ config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context;
+
+ cp_psp->funcs.update_stream_config(cp_psp->handle, &config);
}
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index de5c7d1e0267..d4ff6cc6b8d9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3216,3 +3216,36 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
return hpo_dp_link_enc;
}
#endif
+
+uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter)
+{
+ /* TODO - get transmitter to phy idx mapping from DMUB */
+ uint8_t phy_idx = transmitter - TRANSMITTER_UNIPHY_A;
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ if (dc->ctx->dce_version == DCN_VERSION_3_1 &&
+ dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+ switch (transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ phy_idx = 0;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ phy_idx = 1;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ phy_idx = 5;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ phy_idx = 6;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ phy_idx = 4;
+ break;
+ default:
+ phy_idx = 0;
+ break;
+ }
+ }
+#endif
+ return phy_idx;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index f19015413ce3..530a72e3eefe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1365,7 +1365,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
uint32_t opp_id_src1 = OPP_ID_INVALID;
// Step 1: To find out which OPTC is running & OPTC DSC is ON
- for (i = 0; i < dc->res_pool->res_cap->num_timing_generator; i++) {
+ // We can't use res_pool->res_cap->num_timing_generator to check
+ // Because it records display pipes default setting built in driver,
+ // not display pipes of the current chip.
+ // Some ASICs would be fused display pipes less than the default setting.
+ // In dcnxx_resource_construct function, driver would obatin real information.
+ for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
uint32_t optc_dsc_state = 0;
struct timing_generator *tg = dc->res_pool->timing_generators[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index 71c359f9cdd2..8b9b1a5309ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -100,6 +100,35 @@ static uint8_t phy_id_from_transmitter(enum transmitter t)
return phy_id;
}
+static bool has_query_dp_alt(struct link_encoder *enc)
+{
+ struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+
+ /* Supports development firmware and firmware >= 4.0.11 */
+ return dc_dmub_srv &&
+ !(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ dc_dmub_srv->dmub->fw_version <= DMUB_FW_VERSION(4, 0, 10));
+}
+
+static bool query_dp_alt_from_dmub(struct link_encoder *enc,
+ union dmub_rb_cmd *cmd)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS;
+ cmd->query_dp_alt.header.sub_type =
+ DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
+ cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
+ cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
+
+ if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, cmd))
+ return false;
+
+ return true;
+}
+
void dcn31_link_encoder_set_dio_phy_mux(
struct link_encoder *enc,
enum encoder_type_select sel,
@@ -569,45 +598,90 @@ void dcn31_link_encoder_disable_output(
bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
- struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
union dmub_rb_cmd cmd;
- bool is_usb_c_alt_mode = false;
+ uint32_t dp_alt_mode_disable;
- if (enc->features.flags.bits.DP_IS_USB_C && dc_dmub_srv) {
- memset(&cmd, 0, sizeof(cmd));
- cmd.query_dp_alt.header.type = DMUB_CMD__VBIOS;
- cmd.query_dp_alt.header.sub_type = DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
- cmd.query_dp_alt.header.payload_bytes = sizeof(cmd.panel_cntl.data);
- cmd.query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
+ /* Only applicable to USB-C PHY. */
+ if (!enc->features.flags.bits.DP_IS_USB_C)
+ return false;
- if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd))
+ /*
+ * Use the new interface from DMCUB if available.
+ * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running.
+ */
+ if (has_query_dp_alt(enc)) {
+ if (!query_dp_alt_from_dmub(enc, &cmd))
return false;
- is_usb_c_alt_mode = (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
+ return (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
}
- return is_usb_c_alt_mode;
+ /* Legacy path, avoid if possible. */
+ if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
+ } else {
+ /*
+ * B0 phys use a new set of registers to check whether alt mode is disabled.
+ * if value == 1 alt mode is disabled, otherwise it is enabled.
+ */
+ if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
+ } else {
+ REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+ &dp_alt_mode_disable);
+ }
+ }
+
+ return (dp_alt_mode_disable == 0);
}
void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_link_settings *link_settings)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
- struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
union dmub_rb_cmd cmd;
+ uint32_t is_in_usb_c_dp4_mode = 0;
dcn10_link_encoder_get_max_link_cap(enc, link_settings);
- if (enc->features.flags.bits.DP_IS_USB_C && dc_dmub_srv) {
- memset(&cmd, 0, sizeof(cmd));
- cmd.query_dp_alt.header.type = DMUB_CMD__VBIOS;
- cmd.query_dp_alt.header.sub_type = DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
- cmd.query_dp_alt.header.payload_bytes = sizeof(cmd.panel_cntl.data);
- cmd.query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
+ /* Take the link cap directly if not USB */
+ if (!enc->features.flags.bits.DP_IS_USB_C)
+ return;
- if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd))
+ /*
+ * Use the new interface from DMCUB if available.
+ * Avoids hanging the RDCPSPIPE if DMCUB wasn't already running.
+ */
+ if (has_query_dp_alt(enc)) {
+ if (!query_dp_alt_from_dmub(enc, &cmd))
return;
- if (cmd.query_dp_alt.data.is_usb && cmd.query_dp_alt.data.is_dp4 == 0)
+ if (cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
+
+ return;
}
+
+ /* Legacy path, avoid if possible. */
+ if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
+ } else {
+ if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) ||
+ (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
+ REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
+ } else {
+ REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4,
+ &is_in_usb_c_dp4_mode);
+ }
+ }
+
+ if (!is_in_usb_c_dp4_mode)
+ link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 4d9c64d982d7..42ed47e8133d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1984,7 +1984,7 @@ static void dcn31_calculate_wm_and_dlg_fp(
pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- if (dc->config.forced_clocks) {
+ if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
index 511f9e1159c7..4229369c57f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
@@ -34,12 +34,12 @@ struct cp_psp_stream_config {
uint8_t dig_fe;
uint8_t link_enc_idx;
uint8_t stream_enc_idx;
- uint8_t phy_idx;
uint8_t dio_output_idx;
- uint8_t dio_output_type;
+ uint8_t phy_idx;
uint8_t assr_enabled;
uint8_t mst_enabled;
uint8_t dp2_enabled;
+ uint8_t usb4_enabled;
void *dm_stream_ctx;
bool dpms_off;
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index e589cbe67307..4249bf306e09 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -208,4 +208,6 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
const struct dc_link *link);
#endif
+uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter);
+
#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
index 9ccafe007b23..c4b067d01895 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
@@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
}
}
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source)
-{
- const struct irq_source_info *info;
- uint32_t addr;
- uint32_t value;
- uint32_t current_status;
-
- info = find_irq_source_info(irq_service, source);
- if (!info)
- return 0;
-
- addr = info->status_reg;
- if (!addr)
- return 0;
-
- value = dm_read_reg(irq_service->ctx, addr);
- current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE);
-
- return current_status;
-}
-
static bool hpd_ack(
struct irq_service *irq_service,
const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
index 4d69ab24ca25..aee4b37999f1 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
@@ -31,6 +31,4 @@
struct irq_service *dal_irq_service_dcn20_create(
struct irq_service_init_data *init_data);
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 235294534c43..0f15bcada4e9 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -134,31 +134,6 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic
return DC_IRQ_SOURCE_INVALID;
}
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source)
-{
- const struct irq_source_info *info;
- uint32_t addr;
- uint32_t value;
- uint32_t current_status;
-
- info = find_irq_source_info(irq_service, source);
- if (!info)
- return 0;
-
- addr = info->status_reg;
- if (!addr)
- return 0;
-
- value = dm_read_reg(irq_service->ctx, addr);
- current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE);
-
- return current_status;
-}
-
static bool hpd_ack(
struct irq_service *irq_service,
const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
index 616470e32380..da2bd0e93d7a 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
@@ -31,6 +31,4 @@
struct irq_service *dal_irq_service_dcn21_create(
struct irq_service_init_data *init_data);
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 4db1133e4466..a2a4fbeb83f8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service **irq_service)
*irq_service = NULL;
}
-const struct irq_source_info *find_irq_source_info(
+static const struct irq_source_info *find_irq_source_info(
struct irq_service *irq_service,
enum dc_irq_source source)
{
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
index e60b82480093..dbfcb096eedd 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
@@ -69,10 +69,6 @@ struct irq_service {
const struct irq_service_funcs *funcs;
};
-const struct irq_source_info *find_irq_source_info(
- struct irq_service *irq_service,
- enum dc_irq_source source);
-
void dal_irq_service_construct(
struct irq_service *irq_service,
struct irq_service_init_data *init_data);
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
index 6d648c889866..f7420c3f5672 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -104,6 +104,7 @@ struct mod_hdcp_displayport {
uint8_t rev;
uint8_t assr_enabled;
uint8_t mst_enabled;
+ uint8_t usb4_enabled;
};
struct mod_hdcp_hdmi {
@@ -249,7 +250,6 @@ struct mod_hdcp_link {
uint8_t ddc_line;
uint8_t link_enc_idx;
uint8_t phy_idx;
- uint8_t dio_output_type;
uint8_t dio_output_id;
uint8_t hdcp_supported_informational;
union {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 380811b91350..4885c4ae78b7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1625,10 +1625,18 @@ static int aldebaran_set_df_cstate(struct smu_context *smu,
static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en)
{
- return smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_GmiPwrDnControl,
- en ? 0 : 1,
- NULL);
+ struct amdgpu_device *adev = smu->adev;
+
+ /* The message only works on master die and NACK will be sent
+ back for other dies, only send it on master die */
+ if (!adev->smuio.funcs->get_socket_id(adev) &&
+ !adev->smuio.funcs->get_die_id(adev))
+ return smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_GmiPwrDnControl,
+ en ? 0 : 1,
+ NULL);
+ else
+ return 0;
}
static const struct throttling_logging_label {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index aef2fbd676e5..9603193d2fa1 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -828,8 +828,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
}
if (!crtc_state->enable && !can_update_disabled) {
- drm_dbg_kms(plane_state->crtc->dev,
- "Cannot update plane of a disabled CRTC.\n");
+ drm_dbg_kms(plane_state->plane->dev,
+ "Cannot update plane of a disabled CRTC.\n");
return -EINVAL;
}
@@ -839,8 +839,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
if (hscale < 0 || vscale < 0) {
- drm_dbg_kms(plane_state->crtc->dev,
- "Invalid scaling of plane\n");
+ drm_dbg_kms(plane_state->plane->dev,
+ "Invalid scaling of plane\n");
drm_rect_debug_print("src: ", &plane_state->src, true);
drm_rect_debug_print("dst: ", &plane_state->dst, false);
return -ERANGE;
@@ -864,8 +864,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
return 0;
if (!can_position && !drm_rect_equals(dst, &clip)) {
- drm_dbg_kms(plane_state->crtc->dev,
- "Plane must cover entire CRTC\n");
+ drm_dbg_kms(plane_state->plane->dev,
+ "Plane must cover entire CRTC\n");
drm_rect_debug_print("dst: ", dst, false);
drm_rect_debug_print("clip: ", &clip, false);
return -EINVAL;
@@ -1016,7 +1016,7 @@ crtc_needs_disable(struct drm_crtc_state *old_state,
* it's in self refresh mode and needs to be fully disabled.
*/
return old_state->active ||
- (old_state->self_refresh_active && !new_state->enable) ||
+ (old_state->self_refresh_active && !new_state->active) ||
new_state->self_refresh_active;
}
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index f3d79eda94bb..8b3822142fed 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -5511,6 +5511,7 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
mutex_init(&mgr->probe_lock);
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
mutex_init(&mgr->topology_ref_history_lock);
+ stack_depot_init();
#endif
INIT_LIST_HEAD(&mgr->tx_msg_downq);
INIT_LIST_HEAD(&mgr->destroy_port_list);
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index ded8968b3e8a..0327d595e028 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -209,11 +209,11 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
if (ret)
return ret;
- src = data[0].vaddr; /* TODO: Use mapping abstraction properly */
ret = drm_gem_fb_vmap(fb, map, data);
if (ret)
goto out_drm_gem_fb_end_cpu_access;
+ src = data[0].vaddr; /* TODO: Use mapping abstraction properly */
switch (fb->format->format) {
case DRM_FORMAT_RGB565:
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 7d1c578388d3..8257f9d4f619 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -980,6 +980,10 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
add_hole(&mm->head_node);
mm->scan_active = 0;
+
+#ifdef CONFIG_DRM_DEBUG_MM
+ stack_depot_init();
+#endif
}
EXPORT_SYMBOL(drm_mm_init);
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index c97323365675..918065982db4 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -107,6 +107,11 @@ static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
kfree(buf);
}
+
+static void __drm_stack_depot_init(void)
+{
+ stack_depot_init();
+}
#else /* CONFIG_DRM_DEBUG_MODESET_LOCK */
static depot_stack_handle_t __drm_stack_depot_save(void)
{
@@ -115,6 +120,9 @@ static depot_stack_handle_t __drm_stack_depot_save(void)
static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
{
}
+static void __drm_stack_depot_init(void)
+{
+}
#endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */
/**
@@ -359,6 +367,7 @@ void drm_modeset_lock_init(struct drm_modeset_lock *lock)
{
ww_mutex_init(&lock->mutex, &crtc_ww_class);
INIT_LIST_HEAD(&lock->head);
+ __drm_stack_depot_init();
}
EXPORT_SYMBOL(drm_modeset_lock_init);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ba5fd012a40a..37018bc55810 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1047,7 +1047,7 @@ pm_put:
void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
{
- unsigned int i = 0;
+ unsigned int i;
dev_err(gpu->dev, "recover hung GPU!\n");
@@ -1060,7 +1060,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
/* complete all events, the GPU won't do it after the reset */
spin_lock(&gpu->event_spinlock);
- for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS)
+ for_each_set_bit(i, gpu->event_bitmap, ETNA_NR_EVENTS)
complete(&gpu->event_free);
bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
spin_unlock(&gpu->event_spinlock);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 9c9d574f0b8c..cab505277595 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -1298,6 +1298,28 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder,
intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
DKL_TX_DP20BITMODE, 0);
+
+ if (IS_ALDERLAKE_P(dev_priv)) {
+ u32 val;
+
+ if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
+ if (ln == 0) {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(2);
+ } else {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(3);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(3);
+ }
+ } else {
+ val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+ val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0);
+ }
+
+ intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
+ DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK |
+ DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK,
+ val);
+ }
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
index 1e689d573512..e2dfb93a82bd 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
@@ -477,14 +477,14 @@ static const struct intel_ddi_buf_trans icl_combo_phy_trans_hdmi = {
static const union intel_ddi_buf_trans_entry _ehl_combo_phy_trans_dp[] = {
/* NT mV Trans mV db */
{ .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */
- { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */
- { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } }, /* 350 700 6.0 */
- { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 350 900 8.2 */
+ { .icl = { 0xA, 0x47, 0x38, 0x00, 0x07 } }, /* 350 500 3.1 */
+ { .icl = { 0xC, 0x64, 0x33, 0x00, 0x0C } }, /* 350 700 6.0 */
+ { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 350 900 8.2 */
{ .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */
- { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */
+ { .icl = { 0xC, 0x64, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */
{ .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */
{ .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */
- { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */
+ { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 600 900 3.5 */
{ .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index aaf970c37aa2..1478c02a82cb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -538,6 +538,9 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
{
struct i915_mmap_offset *mmo, *mn;
+ if (obj->ops->unmap_virtual)
+ obj->ops->unmap_virtual(obj);
+
spin_lock(&obj->mmo.lock);
rbtree_postorder_for_each_entry_safe(mmo, mn,
&obj->mmo.offsets, offset) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index f9f7e44099fe..4b4829eb16c2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -67,6 +67,7 @@ struct drm_i915_gem_object_ops {
int (*pwrite)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg);
u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
+ void (*unmap_virtual)(struct drm_i915_gem_object *obj);
int (*dmabuf_export)(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 89b70f5cde7a..9f429ed6e78a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -161,7 +161,6 @@ retry:
/* Immediately discard the backing storage */
int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
{
- drm_gem_free_mmap_offset(&obj->base);
if (obj->ops->truncate)
return obj->ops->truncate(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 923cc7ad8d70..de3fe79b665a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -556,6 +556,20 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
}
+static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ int err;
+
+ WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED);
+
+ err = i915_ttm_move_notify(bo);
+ if (err)
+ return err;
+
+ return i915_ttm_purge(obj);
+}
+
static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
@@ -883,6 +897,11 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (ret)
return ret;
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ dma_resv_unlock(bo->base.resv);
+ return VM_FAULT_SIGBUS;
+ }
+
if (drm_dev_enter(dev, &idx)) {
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
TTM_BO_VM_NUM_PREFAULT);
@@ -945,6 +964,11 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
return drm_vma_node_offset_addr(&obj->base.vma_node);
}
+static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
+{
+ ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
+}
+
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.name = "i915_gem_object_ttm",
.flags = I915_GEM_OBJECT_IS_SHRINKABLE |
@@ -952,7 +976,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
- .truncate = i915_ttm_purge,
+ .truncate = i915_ttm_truncate,
.shrinker_release_pages = i915_ttm_shrinker_release_pages,
.adjust_lru = i915_ttm_adjust_lru,
@@ -960,6 +984,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.migrate = i915_ttm_migrate,
.mmap_offset = i915_ttm_mmap_offset,
+ .unmap_virtual = i915_ttm_unmap_virtual,
.mmap_ops = &vm_ops_ttm,
};
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 743e6ab2c40b..c6291429b00c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1368,20 +1368,10 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
}
}
- if (!obj->ops->mmap_ops) {
- err = check_absent(addr, obj->base.size);
- if (err) {
- pr_err("%s: was not absent\n", obj->mm.region->name);
- goto out_unmap;
- }
- } else {
- /* ttm allows access to evicted regions by design */
-
- err = check_present(addr, obj->base.size);
- if (err) {
- pr_err("%s: was not present\n", obj->mm.region->name);
- goto out_unmap;
- }
+ err = check_absent(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not absent\n", obj->mm.region->name);
+ goto out_unmap;
}
out_unmap:
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 170bba913c30..e27f3b7cf094 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4273,26 +4273,6 @@ static struct ctl_table oa_table[] = {
{}
};
-static struct ctl_table i915_root[] = {
- {
- .procname = "i915",
- .maxlen = 0,
- .mode = 0555,
- .child = oa_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = i915_root,
- },
- {}
-};
-
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
@@ -4488,7 +4468,7 @@ static int destroy_config(int id, void *p, void *data)
int i915_perf_sysctl_register(void)
{
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl("dev/i915", oa_table);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4c28dadf8d69..971d601fe751 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -11166,8 +11166,12 @@ enum skl_power_gate {
_DKL_PHY2_BASE) + \
_DKL_TX_DPCNTL1)
-#define _DKL_TX_DPCNTL2 0x2C8
-#define DKL_TX_DP20BITMODE (1 << 2)
+#define _DKL_TX_DPCNTL2 0x2C8
+#define DKL_TX_DP20BITMODE REG_BIT(2)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK REG_GENMASK(4, 3)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK, (val))
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK REG_GENMASK(6, 5)
+#define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, (val))
#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \
_DKL_PHY1_BASE, \
_DKL_PHY2_BASE) + \
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 22dab36afcb6..53f1ccb78849 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,6 +68,9 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
spin_lock_init(&rpm->debug.lock);
+
+ if (rpm->available)
+ stack_depot_init();
}
static noinline depot_stack_handle_t
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
index 195b2323ec00..4b6f5655fab5 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
@@ -107,9 +107,12 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev,
static void i915_pxp_tee_component_unbind(struct device *i915_kdev,
struct device *tee_kdev, void *data)
{
+ struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev);
+ intel_wakeref_t wakeref;
- intel_pxp_fini_hw(pxp);
+ with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref)
+ intel_pxp_fini_hw(pxp);
mutex_lock(&pxp->tee_mutex);
pxp->pxp_component = NULL;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index e2488559cc9f..11ad210919c8 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -666,18 +666,18 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv)) {
r = -ENOMEM;
- goto out_suspend;
+ goto err_suspend;
}
if (rdev->accel_working) {
vm = &fpriv->vm;
r = radeon_vm_init(rdev, vm);
if (r)
- goto out_fpriv;
+ goto err_fpriv;
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
if (r)
- goto out_vm_fini;
+ goto err_vm_fini;
/* map the ib pool buffer read only into
* virtual address space */
@@ -685,7 +685,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
rdev->ring_tmp_bo.bo);
if (!vm->ib_bo_va) {
r = -ENOMEM;
- goto out_vm_fini;
+ goto err_vm_fini;
}
r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va,
@@ -693,19 +693,21 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_SNOOPED);
if (r)
- goto out_vm_fini;
+ goto err_vm_fini;
}
file_priv->driver_priv = fpriv;
}
- if (!r)
- goto out_suspend;
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+ return 0;
-out_vm_fini:
+err_vm_fini:
radeon_vm_fini(rdev, vm);
-out_fpriv:
+err_fpriv:
kfree(fpriv);
-out_suspend:
+
+err_suspend:
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return r;
diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
index b64d93da651d..5e2b0175df36 100644
--- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
+++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
@@ -658,8 +658,10 @@ int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node)
return -EPROBE_DEFER;
phy = platform_get_drvdata(pdev);
- if (!phy)
+ if (!phy) {
+ put_device(&pdev->dev);
return -EPROBE_DEFER;
+ }
hdmi->phy = phy;
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 0037eefe3239..a3ad7c9736ec 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -68,9 +68,11 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp)
#if defined(__i386__) || defined(__x86_64__)
if (caching == ttm_write_combined)
tmp = pgprot_writecombine(tmp);
+#ifndef CONFIG_UML
else if (boot_cpu_data.x86 > 3)
tmp = pgprot_noncached(tmp);
-#endif
+#endif /* CONFIG_UML */
+#endif /* __i386__ || __x86_64__ */
#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
defined(__powerpc__) || defined(__mips__)
if (caching == ttm_write_combined)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 21f410901694..3313b92db531 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -279,7 +279,7 @@ void virtio_gpu_deinit(struct drm_device *dev)
flush_work(&vgdev->ctrlq.dequeue_work);
flush_work(&vgdev->cursorq.dequeue_work);
flush_work(&vgdev->config_changed_work);
- vgdev->vdev->config->reset(vgdev->vdev);
+ virtio_reset_device(vgdev->vdev);
vgdev->vdev->config->del_vqs(vgdev->vdev);
}
diff --git a/drivers/greybus/es2.c b/drivers/greybus/es2.c
index 15661c7f3633..e89cca015095 100644
--- a/drivers/greybus/es2.c
+++ b/drivers/greybus/es2.c
@@ -78,7 +78,7 @@ struct es2_cport_in {
* @hd: pointer to our gb_host_device structure
*
* @cport_in: endpoint, urbs and buffer for cport in messages
- * @cport_out_endpoint: endpoint for for cport out messages
+ * @cport_out_endpoint: endpoint for cport out messages
* @cport_out_urb: array of urbs for the CPort out messages
* @cport_out_urb_busy: array of flags to see if the @cport_out_urb is busy or
* not.
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 26cee452ec44..85975031389b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -400,6 +400,7 @@
#define USB_DEVICE_ID_HP_X2 0x074d
#define USB_DEVICE_ID_HP_X2_10_COVER 0x0755
#define I2C_DEVICE_ID_HP_ENVY_X360_15 0x2d05
+#define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100 0x29CF
#define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817
#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544
#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 1ce75e8b49d5..112901d2d8d2 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -330,6 +330,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100),
+ HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
index 72957a9f7117..efa6140915f4 100644
--- a/drivers/hid/hid-vivaldi.c
+++ b/drivers/hid/hid-vivaldi.c
@@ -6,16 +6,17 @@
* Author: Sean O'Brien <seobrien@chromium.org>
*/
+#include <linux/device.h>
#include <linux/hid.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sysfs.h>
#define MIN_FN_ROW_KEY 1
#define MAX_FN_ROW_KEY 24
#define HID_VD_FN_ROW_PHYSMAP 0x00000001
#define HID_USAGE_FN_ROW_PHYSMAP (HID_UP_GOOGLEVENDOR | HID_VD_FN_ROW_PHYSMAP)
-static struct hid_driver hid_vivaldi;
-
struct vivaldi_data {
u32 function_row_physmap[MAX_FN_ROW_KEY - MIN_FN_ROW_KEY + 1];
int max_function_row_key;
@@ -40,7 +41,7 @@ static ssize_t function_row_physmap_show(struct device *dev,
return size;
}
-DEVICE_ATTR_RO(function_row_physmap);
+static DEVICE_ATTR_RO(function_row_physmap);
static struct attribute *sysfs_attrs[] = {
&dev_attr_function_row_physmap.attr,
NULL
@@ -74,10 +75,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
struct hid_usage *usage)
{
struct vivaldi_data *drvdata = hid_get_drvdata(hdev);
+ struct hid_report *report = field->report;
int fn_key;
int ret;
u32 report_len;
- u8 *buf;
+ u8 *report_data, *buf;
if (field->logical != HID_USAGE_FN_ROW_PHYSMAP ||
(usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL)
@@ -89,12 +91,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
if (fn_key > drvdata->max_function_row_key)
drvdata->max_function_row_key = fn_key;
- buf = hid_alloc_report_buf(field->report, GFP_KERNEL);
- if (!buf)
+ report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL);
+ if (!report_data)
return;
- report_len = hid_report_len(field->report);
- ret = hid_hw_raw_request(hdev, field->report->id, buf,
+ report_len = hid_report_len(report);
+ if (!report->id) {
+ /*
+ * hid_hw_raw_request() will stuff report ID (which will be 0)
+ * into the first byte of the buffer even for unnumbered
+ * reports, so we need to account for this to avoid getting
+ * -EOVERFLOW in return.
+ * Note that hid_alloc_report_buf() adds 7 bytes to the size
+ * so we can safely say that we have space for an extra byte.
+ */
+ report_len++;
+ }
+
+ ret = hid_hw_raw_request(hdev, report->id, report_data,
report_len, HID_FEATURE_REPORT,
HID_REQ_GET_REPORT);
if (ret < 0) {
@@ -103,7 +117,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
goto out;
}
- ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
+ if (!report->id) {
+ /*
+ * Undo the damage from hid_hw_raw_request() for unnumbered
+ * reports.
+ */
+ report_data++;
+ report_len--;
+ }
+
+ ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
report_len, 0);
if (ret) {
dev_warn(&hdev->dev, "failed to report feature %d\n",
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 8fe3efcb8327..614adb510dbd 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -28,11 +28,22 @@
struct uhid_device {
struct mutex devlock;
+
+ /* This flag tracks whether the HID device is usable for commands from
+ * userspace. The flag is already set before hid_add_device(), which
+ * runs in workqueue context, to allow hid_add_device() to communicate
+ * with userspace.
+ * However, if hid_add_device() fails, the flag is cleared without
+ * holding devlock.
+ * We guarantee that if @running changes from true to false while you're
+ * holding @devlock, it's still fine to access @hid.
+ */
bool running;
__u8 *rd_data;
uint rd_size;
+ /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */
struct hid_device *hid;
struct uhid_event input_buf;
@@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work)
if (ret) {
hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret);
- hid_destroy_device(uhid->hid);
- uhid->hid = NULL;
- uhid->running = false;
+ /* We used to call hid_destroy_device() here, but that's really
+ * messy to get right because we have to coordinate with
+ * concurrent writes from userspace that might be in the middle
+ * of using uhid->hid.
+ * Just leave uhid->hid as-is for now, and clean it up when
+ * userspace tries to close or reinitialize the uhid instance.
+ *
+ * However, we do have to clear the ->running flag and do a
+ * wakeup to make sure userspace knows that the device is gone.
+ */
+ WRITE_ONCE(uhid->running, false);
+ wake_up_interruptible(&uhid->report_wait);
}
}
@@ -174,9 +194,9 @@ static int __uhid_report_queue_and_wait(struct uhid_device *uhid,
spin_unlock_irqrestore(&uhid->qlock, flags);
ret = wait_event_interruptible_timeout(uhid->report_wait,
- !uhid->report_running || !uhid->running,
+ !uhid->report_running || !READ_ONCE(uhid->running),
5 * HZ);
- if (!ret || !uhid->running || uhid->report_running)
+ if (!ret || !READ_ONCE(uhid->running) || uhid->report_running)
ret = -EIO;
else if (ret < 0)
ret = -ERESTARTSYS;
@@ -217,7 +237,7 @@ static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum,
struct uhid_event *ev;
int ret;
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EIO;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -259,7 +279,7 @@ static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum,
struct uhid_event *ev;
int ret;
- if (!uhid->running || count > UHID_DATA_MAX)
+ if (!READ_ONCE(uhid->running) || count > UHID_DATA_MAX)
return -EIO;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -474,7 +494,7 @@ static int uhid_dev_create2(struct uhid_device *uhid,
void *rd_data;
int ret;
- if (uhid->running)
+ if (uhid->hid)
return -EALREADY;
rd_size = ev->u.create2.rd_size;
@@ -556,15 +576,16 @@ static int uhid_dev_create(struct uhid_device *uhid,
static int uhid_dev_destroy(struct uhid_device *uhid)
{
- if (!uhid->running)
+ if (!uhid->hid)
return -EINVAL;
- uhid->running = false;
+ WRITE_ONCE(uhid->running, false);
wake_up_interruptible(&uhid->report_wait);
cancel_work_sync(&uhid->worker);
hid_destroy_device(uhid->hid);
+ uhid->hid = NULL;
kfree(uhid->rd_data);
return 0;
@@ -572,7 +593,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid)
static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input.data,
@@ -583,7 +604,7 @@ static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data,
@@ -595,7 +616,7 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
static int uhid_dev_get_report_reply(struct uhid_device *uhid,
struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev);
@@ -605,7 +626,7 @@ static int uhid_dev_get_report_reply(struct uhid_device *uhid,
static int uhid_dev_set_report_reply(struct uhid_device *uhid,
struct uhid_event *ev)
{
- if (!uhid->running)
+ if (!READ_ONCE(uhid->running))
return -EINVAL;
uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 2a4cc39962e7..a7176fc0635d 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2588,6 +2588,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
}
}
+static bool wacom_wac_slot_is_active(struct input_dev *dev, int key)
+{
+ struct input_mt *mt = dev->mt;
+ struct input_mt_slot *s;
+
+ if (!mt)
+ return false;
+
+ for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
+ if (s->key == key &&
+ input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void wacom_wac_finger_event(struct hid_device *hdev,
struct hid_field *field, struct hid_usage *usage, __s32 value)
{
@@ -2638,9 +2656,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
}
if (usage->usage_index + 1 == field->report_count) {
- if (equivalent_usage == wacom_wac->hid_data.last_slot_field &&
- wacom_wac->hid_data.confidence)
- wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+ if (equivalent_usage == wacom_wac->hid_data.last_slot_field) {
+ bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input,
+ wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch;
+
+ if (wacom_wac->hid_data.confidence || touch_removed) {
+ wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+ }
+ }
}
}
@@ -2659,6 +2682,10 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
hid_data->confidence = true;
+ hid_data->cc_report = 0;
+ hid_data->cc_index = -1;
+ hid_data->cc_value_index = -1;
+
for (i = 0; i < report->maxfield; i++) {
struct hid_field *field = report->field[i];
int j;
@@ -2692,11 +2719,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
hid_data->cc_index >= 0) {
struct hid_field *field = report->field[hid_data->cc_index];
int value = field->value[hid_data->cc_value_index];
- if (value)
+ if (value) {
hid_data->num_expected = value;
+ hid_data->num_received = 0;
+ }
}
else {
hid_data->num_expected = wacom_wac->features.touch_max;
+ hid_data->num_received = 0;
}
}
@@ -2724,6 +2754,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev,
input_sync(input);
wacom_wac->hid_data.num_received = 0;
+ wacom_wac->hid_data.num_expected = 0;
/* keep touch state for pen event */
wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 2829575fd9b7..60375879612f 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -1554,7 +1554,7 @@ int vmbus_request_offers(void)
struct vmbus_channel_msginfo *msginfo;
int ret;
- msginfo = kmalloc(sizeof(*msginfo) +
+ msginfo = kzalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_message_header),
GFP_KERNEL);
if (!msginfo)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 7be173a99f27..181d16bbf49d 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -44,10 +44,10 @@ EXPORT_SYMBOL_GPL(hv_vp_index);
u32 hv_max_vp_index;
EXPORT_SYMBOL_GPL(hv_max_vp_index);
-void __percpu **hyperv_pcpu_input_arg;
+void * __percpu *hyperv_pcpu_input_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
-void __percpu **hyperv_pcpu_output_arg;
+void * __percpu *hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
/*
@@ -295,3 +295,14 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
+
+void __weak *hv_map_memory(void *addr, unsigned long size)
+{
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(hv_map_memory);
+
+void __weak hv_unmap_memory(void *addr)
+{
+}
+EXPORT_SYMBOL_GPL(hv_unmap_memory);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 7ae04ccb1043..17bf55fe3169 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/syscore_ops.h>
+#include <linux/dma-map-ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@@ -2078,6 +2079,7 @@ struct hv_device *vmbus_device_create(const guid_t *type,
return child_device_obj;
}
+static u64 vmbus_dma_mask = DMA_BIT_MASK(64);
/*
* vmbus_device_register - Register the child device
*/
@@ -2118,6 +2120,8 @@ int vmbus_device_register(struct hv_device *child_device_obj)
}
hv_debug_add_dev_dir(child_device_obj);
+ child_device_obj->device.dma_mask = &vmbus_dma_mask;
+ child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
return 0;
err_kset_unregister:
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index d401f9acf450..9949eeb79378 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -451,7 +451,7 @@ static int i8k_get_power_status(void)
static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
{
- struct dell_smm_data *data = PDE_DATA(file_inode(fp));
+ struct dell_smm_data *data = pde_data(file_inode(fp));
int __user *argp = (int __user *)arg;
int speed, err;
int val = 0;
@@ -585,7 +585,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
static int i8k_open_fs(struct inode *inode, struct file *file)
{
- return single_open(file, i8k_proc_show, PDE_DATA(inode));
+ return single_open(file, i8k_proc_show, pde_data(inode));
}
static const struct proc_ops i8k_proc_ops = {
diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 2a4bed0ab226..7352d2b3c756 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -248,8 +248,7 @@ static int ltc2992_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask
gpio_status = reg;
- gpio_nr = 0;
- for_each_set_bit_from(gpio_nr, mask, LTC2992_GPIO_NR) {
+ for_each_set_bit(gpio_nr, mask, LTC2992_GPIO_NR) {
if (test_bit(LTC2992_GPIO_BIT(gpio_nr), &gpio_status))
set_bit(gpio_nr, bits);
}
diff --git a/drivers/hwspinlock/stm32_hwspinlock.c b/drivers/hwspinlock/stm32_hwspinlock.c
index 3ad0ce0da4d9..5bd11a7fab65 100644
--- a/drivers/hwspinlock/stm32_hwspinlock.c
+++ b/drivers/hwspinlock/stm32_hwspinlock.c
@@ -54,8 +54,23 @@ static const struct hwspinlock_ops stm32_hwspinlock_ops = {
.relax = stm32_hwspinlock_relax,
};
+static void stm32_hwspinlock_disable_clk(void *data)
+{
+ struct platform_device *pdev = data;
+ struct stm32_hwspinlock *hw = platform_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+
+ pm_runtime_get_sync(dev);
+ pm_runtime_disable(dev);
+ pm_runtime_set_suspended(dev);
+ pm_runtime_put_noidle(dev);
+
+ clk_disable_unprepare(hw->clk);
+}
+
static int stm32_hwspinlock_probe(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
struct stm32_hwspinlock *hw;
void __iomem *io_base;
size_t array_size;
@@ -66,41 +81,43 @@ static int stm32_hwspinlock_probe(struct platform_device *pdev)
return PTR_ERR(io_base);
array_size = STM32_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock);
- hw = devm_kzalloc(&pdev->dev, sizeof(*hw) + array_size, GFP_KERNEL);
+ hw = devm_kzalloc(dev, sizeof(*hw) + array_size, GFP_KERNEL);
if (!hw)
return -ENOMEM;
- hw->clk = devm_clk_get(&pdev->dev, "hsem");
+ hw->clk = devm_clk_get(dev, "hsem");
if (IS_ERR(hw->clk))
return PTR_ERR(hw->clk);
- for (i = 0; i < STM32_MUTEX_NUM_LOCKS; i++)
- hw->bank.lock[i].priv = io_base + i * sizeof(u32);
+ ret = clk_prepare_enable(hw->clk);
+ if (ret) {
+ dev_err(dev, "Failed to prepare_enable clock\n");
+ return ret;
+ }
platform_set_drvdata(pdev, hw);
- pm_runtime_enable(&pdev->dev);
- ret = hwspin_lock_register(&hw->bank, &pdev->dev, &stm32_hwspinlock_ops,
- 0, STM32_MUTEX_NUM_LOCKS);
+ pm_runtime_get_noresume(dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ pm_runtime_put(dev);
- if (ret)
- pm_runtime_disable(&pdev->dev);
+ ret = devm_add_action_or_reset(dev, stm32_hwspinlock_disable_clk, pdev);
+ if (ret) {
+ dev_err(dev, "Failed to register action\n");
+ return ret;
+ }
- return ret;
-}
+ for (i = 0; i < STM32_MUTEX_NUM_LOCKS; i++)
+ hw->bank.lock[i].priv = io_base + i * sizeof(u32);
-static int stm32_hwspinlock_remove(struct platform_device *pdev)
-{
- struct stm32_hwspinlock *hw = platform_get_drvdata(pdev);
- int ret;
+ ret = devm_hwspin_lock_register(dev, &hw->bank, &stm32_hwspinlock_ops,
+ 0, STM32_MUTEX_NUM_LOCKS);
- ret = hwspin_lock_unregister(&hw->bank);
if (ret)
- dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
-
- pm_runtime_disable(&pdev->dev);
+ dev_err(dev, "Failed to register hwspinlock\n");
- return 0;
+ return ret;
}
static int __maybe_unused stm32_hwspinlock_runtime_suspend(struct device *dev)
@@ -135,7 +152,6 @@ MODULE_DEVICE_TABLE(of, stm32_hwpinlock_ids);
static struct platform_driver stm32_hwspinlock_driver = {
.probe = stm32_hwspinlock_probe,
- .remove = stm32_hwspinlock_remove,
.driver = {
.name = "stm32_hwspinlock",
.of_match_table = stm32_hwpinlock_ids,
diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.c b/drivers/hwtracing/coresight/coresight-cfg-preload.c
index 751af3710d56..e237a4edfa09 100644
--- a/drivers/hwtracing/coresight/coresight-cfg-preload.c
+++ b/drivers/hwtracing/coresight/coresight-cfg-preload.c
@@ -24,8 +24,13 @@ static struct cscfg_config_desc *preload_cfgs[] = {
NULL
};
+static struct cscfg_load_owner_info preload_owner = {
+ .type = CSCFG_OWNER_PRELOAD,
+};
+
/* preload called on initialisation */
-int cscfg_preload(void)
+int cscfg_preload(void *owner_handle)
{
- return cscfg_load_config_sets(preload_cfgs, preload_feats);
+ preload_owner.owner_handle = owner_handle;
+ return cscfg_load_config_sets(preload_cfgs, preload_feats, &preload_owner);
}
diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h
index 25eb6c632692..9bd44b940add 100644
--- a/drivers/hwtracing/coresight/coresight-config.h
+++ b/drivers/hwtracing/coresight/coresight-config.h
@@ -97,6 +97,8 @@ struct cscfg_regval_desc {
* @params_desc: array of parameters used.
* @nr_regs: number of registers used.
* @regs_desc: array of registers used.
+ * @load_owner: handle to load owner for dynamic load and unload of features.
+ * @fs_group: reference to configfs group for dynamic unload.
*/
struct cscfg_feature_desc {
const char *name;
@@ -107,6 +109,8 @@ struct cscfg_feature_desc {
struct cscfg_parameter_desc *params_desc;
int nr_regs;
struct cscfg_regval_desc *regs_desc;
+ void *load_owner;
+ struct config_group *fs_group;
};
/**
@@ -128,7 +132,8 @@ struct cscfg_feature_desc {
* @presets: Array of preset values.
* @event_ea: Extended attribute for perf event value
* @active_cnt: ref count for activate on this configuration.
- *
+ * @load_owner: handle to load owner for dynamic load and unload of configs.
+ * @fs_group: reference to configfs group for dynamic unload.
*/
struct cscfg_config_desc {
const char *name;
@@ -141,6 +146,8 @@ struct cscfg_config_desc {
const u64 *presets; /* nr_presets * nr_total_params */
struct dev_ext_attribute *event_ea;
atomic_t active_cnt;
+ void *load_owner;
+ struct config_group *fs_group;
};
/**
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 8a18c71df37a..88653d1c06a4 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -729,7 +729,7 @@ static inline void coresight_put_ref(struct coresight_device *csdev)
* coresight_grab_device - Power up this device and any of the helper
* devices connected to it for trace operation. Since the helper devices
* don't appear on the trace path, they should be handled along with the
- * the master device.
+ * master device.
*/
static int coresight_grab_device(struct coresight_device *csdev)
{
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 86a313857b58..bf18128cf5de 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -722,7 +722,16 @@ static int etm4_enable_sysfs(struct coresight_device *csdev)
{
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct etm4_enable_arg arg = { };
- int ret;
+ unsigned long cfg_hash;
+ int ret, preset;
+
+ /* enable any config activated by configfs */
+ cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
+ if (cfg_hash) {
+ ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
+ if (ret)
+ return ret;
+ }
spin_lock(&drvdata->spinlock);
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 58062a5a8238..bb14a3a8a921 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -856,13 +856,11 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
{
int ret;
void __iomem *base;
- unsigned long *guaranteed;
struct device *dev = &adev->dev;
struct coresight_platform_data *pdata = NULL;
struct stm_drvdata *drvdata;
struct resource *res = &adev->res;
struct resource ch_res;
- size_t bitmap_size;
struct coresight_desc desc = { 0 };
desc.name = coresight_alloc_device_name(&stm_devs, dev);
@@ -904,12 +902,10 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
else
drvdata->numsp = stm_num_stimulus_port(drvdata);
- bitmap_size = BITS_TO_LONGS(drvdata->numsp) * sizeof(long);
-
- guaranteed = devm_kzalloc(dev, bitmap_size, GFP_KERNEL);
- if (!guaranteed)
+ drvdata->chs.guaranteed = devm_bitmap_zalloc(dev, drvdata->numsp,
+ GFP_KERNEL);
+ if (!drvdata->chs.guaranteed)
return -ENOMEM;
- drvdata->chs.guaranteed = guaranteed;
spin_lock_init(&drvdata->spinlock);
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
index c547816b9000..433ede94dd63 100644
--- a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
@@ -6,6 +6,7 @@
#include <linux/configfs.h>
+#include "coresight-config.h"
#include "coresight-syscfg-configfs.h"
/* create a default ci_type. */
@@ -87,9 +88,75 @@ static ssize_t cscfg_cfg_values_show(struct config_item *item, char *page)
}
CONFIGFS_ATTR_RO(cscfg_cfg_, values);
+static ssize_t cscfg_cfg_enable_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+
+ return scnprintf(page, PAGE_SIZE, "%d\n", fs_config->active);
+}
+
+static ssize_t cscfg_cfg_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+ int err;
+ bool val;
+
+ err = kstrtobool(page, &val);
+ if (!err)
+ err = cscfg_config_sysfs_activate(fs_config->config_desc, val);
+ if (!err) {
+ fs_config->active = val;
+ if (val)
+ cscfg_config_sysfs_set_preset(fs_config->preset);
+ }
+ return err ? err : count;
+}
+CONFIGFS_ATTR(cscfg_cfg_, enable);
+
+static ssize_t cscfg_cfg_preset_show(struct config_item *item, char *page)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+
+ return scnprintf(page, PAGE_SIZE, "%d\n", fs_config->preset);
+}
+
+static ssize_t cscfg_cfg_preset_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+ struct cscfg_fs_config, group);
+ int preset, err;
+
+ err = kstrtoint(page, 0, &preset);
+ if (!err) {
+ /*
+ * presets start at 1, and go up to max (15),
+ * but the config may provide fewer.
+ */
+ if ((preset < 1) || (preset > fs_config->config_desc->nr_presets))
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ /* set new value */
+ fs_config->preset = preset;
+ /* set on system if active */
+ if (fs_config->active)
+ cscfg_config_sysfs_set_preset(fs_config->preset);
+ }
+ return err ? err : count;
+}
+CONFIGFS_ATTR(cscfg_cfg_, preset);
+
static struct configfs_attribute *cscfg_config_view_attrs[] = {
&cscfg_cfg_attr_description,
&cscfg_cfg_attr_feature_refs,
+ &cscfg_cfg_attr_enable,
+ &cscfg_cfg_attr_preset,
NULL,
};
@@ -334,9 +401,19 @@ int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc)
if (IS_ERR(new_group))
return PTR_ERR(new_group);
err = configfs_register_group(&cscfg_configs_grp, new_group);
+ if (!err)
+ config_desc->fs_group = new_group;
return err;
}
+void cscfg_configfs_del_config(struct cscfg_config_desc *config_desc)
+{
+ if (config_desc->fs_group) {
+ configfs_unregister_group(config_desc->fs_group);
+ config_desc->fs_group = NULL;
+ }
+}
+
static struct config_item_type cscfg_features_type = {
.ct_owner = THIS_MODULE,
};
@@ -358,9 +435,19 @@ int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc)
if (IS_ERR(new_group))
return PTR_ERR(new_group);
err = configfs_register_group(&cscfg_features_grp, new_group);
+ if (!err)
+ feat_desc->fs_group = new_group;
return err;
}
+void cscfg_configfs_del_feature(struct cscfg_feature_desc *feat_desc)
+{
+ if (feat_desc->fs_group) {
+ configfs_unregister_group(feat_desc->fs_group);
+ feat_desc->fs_group = NULL;
+ }
+}
+
int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr)
{
struct configfs_subsystem *subsys;
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.h b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
index 7d6ffe35ca4c..373d84d43268 100644
--- a/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
@@ -15,6 +15,8 @@
struct cscfg_fs_config {
struct cscfg_config_desc *config_desc;
struct config_group group;
+ bool active;
+ int preset;
};
/* container for feature view */
@@ -41,5 +43,7 @@ int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr);
void cscfg_configfs_release(struct cscfg_manager *cscfg_mgr);
int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc);
int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc);
+void cscfg_configfs_del_config(struct cscfg_config_desc *config_desc);
+void cscfg_configfs_del_feature(struct cscfg_feature_desc *feat_desc);
#endif /* CORESIGHT_SYSCFG_CONFIGFS_H */
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c
index 43054568430f..098fc34c4829 100644
--- a/drivers/hwtracing/coresight/coresight-syscfg.c
+++ b/drivers/hwtracing/coresight/coresight-syscfg.c
@@ -250,6 +250,13 @@ static int cscfg_check_feat_for_cfg(struct cscfg_config_desc *config_desc)
static int cscfg_load_feat(struct cscfg_feature_desc *feat_desc)
{
int err;
+ struct cscfg_feature_desc *feat_desc_exist;
+
+ /* new feature must have unique name */
+ list_for_each_entry(feat_desc_exist, &cscfg_mgr->feat_desc_list, item) {
+ if (!strcmp(feat_desc_exist->name, feat_desc->name))
+ return -EEXIST;
+ }
/* add feature to any matching registered devices */
err = cscfg_add_feat_to_csdevs(feat_desc);
@@ -267,6 +274,13 @@ static int cscfg_load_feat(struct cscfg_feature_desc *feat_desc)
static int cscfg_load_config(struct cscfg_config_desc *config_desc)
{
int err;
+ struct cscfg_config_desc *config_desc_exist;
+
+ /* new configuration must have a unique name */
+ list_for_each_entry(config_desc_exist, &cscfg_mgr->config_desc_list, item) {
+ if (!strcmp(config_desc_exist->name, config_desc->name))
+ return -EEXIST;
+ }
/* validate features are present */
err = cscfg_check_feat_for_cfg(config_desc);
@@ -354,6 +368,92 @@ unlock_exit:
return err;
}
+/*
+ * Conditionally up reference count on owner to prevent unload.
+ *
+ * module loaded configs need to be locked in to prevent premature unload.
+ */
+static int cscfg_owner_get(struct cscfg_load_owner_info *owner_info)
+{
+ if ((owner_info->type == CSCFG_OWNER_MODULE) &&
+ (!try_module_get(owner_info->owner_handle)))
+ return -EINVAL;
+ return 0;
+}
+
+/* conditionally lower ref count on an owner */
+static void cscfg_owner_put(struct cscfg_load_owner_info *owner_info)
+{
+ if (owner_info->type == CSCFG_OWNER_MODULE)
+ module_put(owner_info->owner_handle);
+}
+
+static void cscfg_remove_owned_csdev_configs(struct coresight_device *csdev, void *load_owner)
+{
+ struct cscfg_config_csdev *config_csdev, *tmp;
+
+ if (list_empty(&csdev->config_csdev_list))
+ return;
+
+ list_for_each_entry_safe(config_csdev, tmp, &csdev->config_csdev_list, node) {
+ if (config_csdev->config_desc->load_owner == load_owner)
+ list_del(&config_csdev->node);
+ }
+}
+
+static void cscfg_remove_owned_csdev_features(struct coresight_device *csdev, void *load_owner)
+{
+ struct cscfg_feature_csdev *feat_csdev, *tmp;
+
+ if (list_empty(&csdev->feature_csdev_list))
+ return;
+
+ list_for_each_entry_safe(feat_csdev, tmp, &csdev->feature_csdev_list, node) {
+ if (feat_csdev->feat_desc->load_owner == load_owner)
+ list_del(&feat_csdev->node);
+ }
+}
+
+/*
+ * removal is relatively easy - just remove from all lists, anything that
+ * matches the owner. Memory for the descriptors will be managed by the owner,
+ * memory for the csdev items is devm_ allocated with the individual csdev
+ * devices.
+ */
+static void cscfg_unload_owned_cfgs_feats(void *load_owner)
+{
+ struct cscfg_config_desc *config_desc, *cfg_tmp;
+ struct cscfg_feature_desc *feat_desc, *feat_tmp;
+ struct cscfg_registered_csdev *csdev_item;
+
+ /* remove from each csdev instance feature and config lists */
+ list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+ /*
+ * for each csdev, check the loaded lists and remove if
+ * referenced descriptor is owned
+ */
+ cscfg_remove_owned_csdev_configs(csdev_item->csdev, load_owner);
+ cscfg_remove_owned_csdev_features(csdev_item->csdev, load_owner);
+ }
+
+ /* remove from the config descriptor lists */
+ list_for_each_entry_safe(config_desc, cfg_tmp, &cscfg_mgr->config_desc_list, item) {
+ if (config_desc->load_owner == load_owner) {
+ cscfg_configfs_del_config(config_desc);
+ etm_perf_del_symlink_cscfg(config_desc);
+ list_del(&config_desc->item);
+ }
+ }
+
+ /* remove from the feature descriptor lists */
+ list_for_each_entry_safe(feat_desc, feat_tmp, &cscfg_mgr->feat_desc_list, item) {
+ if (feat_desc->load_owner == load_owner) {
+ cscfg_configfs_del_feature(feat_desc);
+ list_del(&feat_desc->item);
+ }
+ }
+}
+
/**
* cscfg_load_config_sets - API function to load feature and config sets.
*
@@ -361,13 +461,22 @@ unlock_exit:
* descriptors and load into the system.
* Features are loaded first to ensure configuration dependencies can be met.
*
+ * To facilitate dynamic loading and unloading, features and configurations
+ * have a "load_owner", to allow later unload by the same owner. An owner may
+ * be a loadable module or configuration dynamically created via configfs.
+ * As later loaded configurations can use earlier loaded features, creating load
+ * dependencies, a load order list is maintained. Unload is strictly in the
+ * reverse order to load.
+ *
* @config_descs: 0 terminated array of configuration descriptors.
* @feat_descs: 0 terminated array of feature descriptors.
+ * @owner_info: Information on the owner of this set.
*/
int cscfg_load_config_sets(struct cscfg_config_desc **config_descs,
- struct cscfg_feature_desc **feat_descs)
+ struct cscfg_feature_desc **feat_descs,
+ struct cscfg_load_owner_info *owner_info)
{
- int err, i = 0;
+ int err = 0, i = 0;
mutex_lock(&cscfg_mutex);
@@ -380,8 +489,10 @@ int cscfg_load_config_sets(struct cscfg_config_desc **config_descs,
if (err) {
pr_err("coresight-syscfg: Failed to load feature %s\n",
feat_descs[i]->name);
+ cscfg_unload_owned_cfgs_feats(owner_info);
goto exit_unlock;
}
+ feat_descs[i]->load_owner = owner_info;
i++;
}
}
@@ -396,18 +507,86 @@ int cscfg_load_config_sets(struct cscfg_config_desc **config_descs,
if (err) {
pr_err("coresight-syscfg: Failed to load configuration %s\n",
config_descs[i]->name);
+ cscfg_unload_owned_cfgs_feats(owner_info);
goto exit_unlock;
}
+ config_descs[i]->load_owner = owner_info;
i++;
}
}
+ /* add the load owner to the load order list */
+ list_add_tail(&owner_info->item, &cscfg_mgr->load_order_list);
+ if (!list_is_singular(&cscfg_mgr->load_order_list)) {
+ /* lock previous item in load order list */
+ err = cscfg_owner_get(list_prev_entry(owner_info, item));
+ if (err) {
+ cscfg_unload_owned_cfgs_feats(owner_info);
+ list_del(&owner_info->item);
+ }
+ }
+
exit_unlock:
mutex_unlock(&cscfg_mutex);
return err;
}
EXPORT_SYMBOL_GPL(cscfg_load_config_sets);
+/**
+ * cscfg_unload_config_sets - unload a set of configurations by owner.
+ *
+ * Dynamic unload of configuration and feature sets is done on the basis of
+ * the load owner of that set. Later loaded configurations can depend on
+ * features loaded earlier.
+ *
+ * Therefore, unload is only possible if:-
+ * 1) no configurations are active.
+ * 2) the set being unloaded was the last to be loaded to maintain dependencies.
+ *
+ * @owner_info: Information on owner for set being unloaded.
+ */
+int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info)
+{
+ int err = 0;
+ struct cscfg_load_owner_info *load_list_item = NULL;
+
+ mutex_lock(&cscfg_mutex);
+
+ /* cannot unload if anything is active */
+ if (atomic_read(&cscfg_mgr->sys_active_cnt)) {
+ err = -EBUSY;
+ goto exit_unlock;
+ }
+
+ /* cannot unload if not last loaded in load order */
+ if (!list_empty(&cscfg_mgr->load_order_list)) {
+ load_list_item = list_last_entry(&cscfg_mgr->load_order_list,
+ struct cscfg_load_owner_info, item);
+ if (load_list_item != owner_info)
+ load_list_item = NULL;
+ }
+
+ if (!load_list_item) {
+ err = -EINVAL;
+ goto exit_unlock;
+ }
+
+ /* unload all belonging to load_owner */
+ cscfg_unload_owned_cfgs_feats(owner_info);
+
+ /* remove from load order list */
+ if (!list_is_singular(&cscfg_mgr->load_order_list)) {
+ /* unlock previous item in load order list */
+ cscfg_owner_put(list_prev_entry(owner_info, item));
+ }
+ list_del(&owner_info->item);
+
+exit_unlock:
+ mutex_unlock(&cscfg_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_unload_config_sets);
+
/* Handle coresight device registration and add configs and features to devices */
/* iterate through config lists and load matching configs to device */
@@ -566,32 +745,26 @@ unlock_exit:
}
EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats);
-/**
- * cscfg_activate_config - Mark a configuration descriptor as active.
- *
- * This will be seen when csdev devices are enabled in the system.
- * Only activated configurations can be enabled on individual devices.
- * Activation protects the configuration from alteration or removal while
- * active.
- *
- * Selection by hash value - generated from the configuration name when it
- * was loaded and added to the cs_etm/configurations file system for selection
- * by perf.
+/*
+ * This activate configuration for either perf or sysfs. Perf can have multiple
+ * active configs, selected per event, sysfs is limited to one.
*
* Increments the configuration descriptor active count and the global active
* count.
*
* @cfg_hash: Hash value of the selected configuration name.
*/
-int cscfg_activate_config(unsigned long cfg_hash)
+static int _cscfg_activate_config(unsigned long cfg_hash)
{
struct cscfg_config_desc *config_desc;
int err = -EINVAL;
- mutex_lock(&cscfg_mutex);
-
list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+ /* must ensure that config cannot be unloaded in use */
+ err = cscfg_owner_get(config_desc->load_owner);
+ if (err)
+ break;
/*
* increment the global active count - control changes to
* active configurations
@@ -609,6 +782,101 @@ int cscfg_activate_config(unsigned long cfg_hash)
break;
}
}
+ return err;
+}
+
+static void _cscfg_deactivate_config(unsigned long cfg_hash)
+{
+ struct cscfg_config_desc *config_desc;
+
+ list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+ if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+ atomic_dec(&config_desc->active_cnt);
+ atomic_dec(&cscfg_mgr->sys_active_cnt);
+ cscfg_owner_put(config_desc->load_owner);
+ dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name);
+ break;
+ }
+ }
+}
+
+/*
+ * called from configfs to set/clear the active configuration for use when
+ * using sysfs to control trace.
+ */
+int cscfg_config_sysfs_activate(struct cscfg_config_desc *config_desc, bool activate)
+{
+ unsigned long cfg_hash;
+ int err = 0;
+
+ mutex_lock(&cscfg_mutex);
+
+ cfg_hash = (unsigned long)config_desc->event_ea->var;
+
+ if (activate) {
+ /* cannot be a current active value to activate this */
+ if (cscfg_mgr->sysfs_active_config) {
+ err = -EBUSY;
+ goto exit_unlock;
+ }
+ err = _cscfg_activate_config(cfg_hash);
+ if (!err)
+ cscfg_mgr->sysfs_active_config = cfg_hash;
+ } else {
+ /* disable if matching current value */
+ if (cscfg_mgr->sysfs_active_config == cfg_hash) {
+ _cscfg_deactivate_config(cfg_hash);
+ cscfg_mgr->sysfs_active_config = 0;
+ } else
+ err = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&cscfg_mutex);
+ return err;
+}
+
+/* set the sysfs preset value */
+void cscfg_config_sysfs_set_preset(int preset)
+{
+ mutex_lock(&cscfg_mutex);
+ cscfg_mgr->sysfs_active_preset = preset;
+ mutex_unlock(&cscfg_mutex);
+}
+
+/*
+ * Used by a device to get the config and preset selected as active in configfs,
+ * when using sysfs to control trace.
+ */
+void cscfg_config_sysfs_get_active_cfg(unsigned long *cfg_hash, int *preset)
+{
+ mutex_lock(&cscfg_mutex);
+ *preset = cscfg_mgr->sysfs_active_preset;
+ *cfg_hash = cscfg_mgr->sysfs_active_config;
+ mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_config_sysfs_get_active_cfg);
+
+/**
+ * cscfg_activate_config - Mark a configuration descriptor as active.
+ *
+ * This will be seen when csdev devices are enabled in the system.
+ * Only activated configurations can be enabled on individual devices.
+ * Activation protects the configuration from alteration or removal while
+ * active.
+ *
+ * Selection by hash value - generated from the configuration name when it
+ * was loaded and added to the cs_etm/configurations file system for selection
+ * by perf.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+int cscfg_activate_config(unsigned long cfg_hash)
+{
+ int err = 0;
+
+ mutex_lock(&cscfg_mutex);
+ err = _cscfg_activate_config(cfg_hash);
mutex_unlock(&cscfg_mutex);
return err;
@@ -624,18 +892,8 @@ EXPORT_SYMBOL_GPL(cscfg_activate_config);
*/
void cscfg_deactivate_config(unsigned long cfg_hash)
{
- struct cscfg_config_desc *config_desc;
-
mutex_lock(&cscfg_mutex);
-
- list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
- if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
- atomic_dec(&config_desc->active_cnt);
- atomic_dec(&cscfg_mgr->sys_active_cnt);
- dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name);
- break;
- }
- }
+ _cscfg_deactivate_config(cfg_hash);
mutex_unlock(&cscfg_mutex);
}
EXPORT_SYMBOL_GPL(cscfg_deactivate_config);
@@ -827,10 +1085,11 @@ int __init cscfg_init(void)
INIT_LIST_HEAD(&cscfg_mgr->csdev_desc_list);
INIT_LIST_HEAD(&cscfg_mgr->feat_desc_list);
INIT_LIST_HEAD(&cscfg_mgr->config_desc_list);
+ INIT_LIST_HEAD(&cscfg_mgr->load_order_list);
atomic_set(&cscfg_mgr->sys_active_cnt, 0);
/* preload built-in configurations */
- err = cscfg_preload();
+ err = cscfg_preload(THIS_MODULE);
if (err)
goto exit_err;
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.h b/drivers/hwtracing/coresight/coresight-syscfg.h
index 8d018efd6ead..9106ffab4833 100644
--- a/drivers/hwtracing/coresight/coresight-syscfg.h
+++ b/drivers/hwtracing/coresight/coresight-syscfg.h
@@ -25,16 +25,22 @@
* @csdev_desc_list: List of coresight devices registered with the configuration manager.
* @feat_desc_list: List of feature descriptors to load into registered devices.
* @config_desc_list: List of system configuration descriptors to load into registered devices.
+ * @load_order_list: Ordered list of owners for dynamically loaded configurations.
* @sys_active_cnt: Total number of active config descriptor references.
* @cfgfs_subsys: configfs subsystem used to manage configurations.
+ * @sysfs_active_config:Active config hash used if CoreSight controlled from sysfs.
+ * @sysfs_active_preset:Active preset index used if CoreSight controlled from sysfs.
*/
struct cscfg_manager {
struct device dev;
struct list_head csdev_desc_list;
struct list_head feat_desc_list;
struct list_head config_desc_list;
+ struct list_head load_order_list;
atomic_t sys_active_cnt;
struct configfs_subsystem cfgfs_subsys;
+ u32 sysfs_active_config;
+ int sysfs_active_preset;
};
/* get reference to dev in cscfg_manager */
@@ -56,18 +62,44 @@ struct cscfg_registered_csdev {
struct list_head item;
};
+/* owner types for loading and unloading of config and feature sets */
+enum cscfg_load_owner_type {
+ CSCFG_OWNER_PRELOAD,
+ CSCFG_OWNER_MODULE,
+};
+
+/**
+ * Load item - item to add to the load order list allowing dynamic load and
+ * unload of configurations and features. Caller loading a config
+ * set provides a context handle for unload. API ensures that
+ * items unloaded strictly in reverse order from load to ensure
+ * dependencies are respected.
+ *
+ * @item: list entry for load order list.
+ * @type: type of owner - allows interpretation of owner_handle.
+ * @owner_handle: load context - handle for owner of loaded configs.
+ */
+struct cscfg_load_owner_info {
+ struct list_head item;
+ int type;
+ void *owner_handle;
+};
+
/* internal core operations for cscfg */
int __init cscfg_init(void);
void cscfg_exit(void);
-int cscfg_preload(void);
+int cscfg_preload(void *owner_handle);
const struct cscfg_feature_desc *cscfg_get_named_feat_desc(const char *name);
int cscfg_update_feat_param_val(struct cscfg_feature_desc *feat_desc,
int param_idx, u64 value);
-
+int cscfg_config_sysfs_activate(struct cscfg_config_desc *cfg_desc, bool activate);
+void cscfg_config_sysfs_set_preset(int preset);
/* syscfg manager external API */
int cscfg_load_config_sets(struct cscfg_config_desc **cfg_descs,
- struct cscfg_feature_desc **feat_descs);
+ struct cscfg_feature_desc **feat_descs,
+ struct cscfg_load_owner_info *owner_info);
+int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info);
int cscfg_register_csdev(struct coresight_device *csdev, u32 match_flags,
struct cscfg_csdev_feat_ops *ops);
void cscfg_unregister_csdev(struct coresight_device *csdev);
@@ -77,5 +109,6 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev);
int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
unsigned long cfg_hash, int preset);
void cscfg_csdev_disable_active_config(struct coresight_device *csdev);
+void cscfg_config_sysfs_get_active_cfg(unsigned long *cfg_hash, int *preset);
#endif /* CORESIGHT_SYSCFG_H */
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c6b854a9e476..42da31c1ab70 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -617,7 +617,7 @@ config I2C_EXYNOS5
help
High-speed I2C controller on Samsung Exynos5 and newer Samsung SoCs:
Exynos5250, Exynos5260, Exynos5410, Exynos542x, Exynos5800,
- Exynos5433 and Exynos7.
+ Exynos5433, Exynos7, Exynos850 and ExynosAutoV9.
Choose Y here only if you build for such Samsung SoC.
config I2C_GPIO
@@ -1153,22 +1153,12 @@ config I2C_XILINX
This driver can also be built as a module. If so, the module
will be called xilinx_i2c.
-config I2C_XLR
- tristate "Netlogic XLR I2C support"
- depends on CPU_XLR || COMPILE_TEST
- help
- This driver enables support for the on-chip I2C interface of
- the Netlogic XLR/XLS MIPS processors and Sigma Designs SOCs.
-
- This driver can also be built as a module. If so, the module
- will be called i2c-xlr.
-
config I2C_XLP9XX
- tristate "XLP9XX I2C support"
- depends on CPU_XLP || ARCH_THUNDER2 || COMPILE_TEST
+ tristate "Cavium ThunderX2 I2C support"
+ depends on ARCH_THUNDER2 || COMPILE_TEST
help
This driver enables support for the on-chip I2C interface of
- the Broadcom XLP9xx/XLP5xx MIPS and Vulcan ARM64 processors.
+ the Cavium ThunderX2 processors. (Originally on Netlogic XLP SoCs.)
This driver can also be built as a module. If so, the module will
be called i2c-xlp9xx.
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index d85899fef8c7..1d00dce77098 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -119,7 +119,6 @@ obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o
i2c-thunderx-objs := i2c-octeon-core.o i2c-thunderx-pcidrv.o
obj-$(CONFIG_I2C_THUNDERX) += i2c-thunderx.o
obj-$(CONFIG_I2C_XILINX) += i2c-xiic.o
-obj-$(CONFIG_I2C_XLR) += i2c-xlr.o
obj-$(CONFIG_I2C_XLP9XX) += i2c-xlp9xx.o
obj-$(CONFIG_I2C_RCAR) += i2c-rcar.o
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 67e8b97c0c95..771e53d3d197 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -16,8 +16,6 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
-#include <linux/irqchip/chained_irq.h>
-#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c
index 37443edbf754..dfc534065595 100644
--- a/drivers/i2c/busses/i2c-bcm2835.c
+++ b/drivers/i2c/busses/i2c-bcm2835.c
@@ -402,7 +402,7 @@ static const struct i2c_adapter_quirks bcm2835_i2c_quirks = {
static int bcm2835_i2c_probe(struct platform_device *pdev)
{
struct bcm2835_i2c_dev *i2c_dev;
- struct resource *mem, *irq;
+ struct resource *mem;
int ret;
struct i2c_adapter *adap;
struct clk *mclk;
@@ -452,12 +452,9 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
return ret;
}
- irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!irq) {
- dev_err(&pdev->dev, "No IRQ resource\n");
- return -ENODEV;
- }
- i2c_dev->irq = irq->start;
+ i2c_dev->irq = platform_get_irq(pdev, 0);
+ if (i2c_dev->irq < 0)
+ return i2c_dev->irq;
ret = request_irq(i2c_dev->irq, bcm2835_i2c_isr, IRQF_SHARED,
dev_name(&pdev->dev), i2c_dev);
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 60a2e750cee9..4b26cba40139 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -191,23 +191,26 @@ struct reset_control;
* @cmd_complete: tx completion indicator
* @clk: input reference clock
* @pclk: clock required to access the registers
+ * @rst: optional reset for the controller
* @slave: represent an I2C slave device
+ * @get_clk_rate_khz: callback to retrieve IP specific bus speed
* @cmd_err: run time hadware error code
* @msgs: points to an array of messages currently being transferred
* @msgs_num: the number of elements in msgs
- * @msg_write_idx: the element index of the current tx message in the msgs
- * array
+ * @msg_write_idx: the element index of the current tx message in the msgs array
* @tx_buf_len: the length of the current tx buffer
* @tx_buf: the current tx buffer
- * @msg_read_idx: the element index of the current rx message in the msgs
- * array
+ * @msg_read_idx: the element index of the current rx message in the msgs array
* @rx_buf_len: the length of the current rx buffer
* @rx_buf: the current rx buffer
* @msg_err: error status of the current transfer
* @status: i2c master status, one of STATUS_*
* @abort_source: copy of the TX_ABRT_SOURCE register
* @irq: interrupt number for the i2c master
+ * @flags: platform specific flags like type of IO accessors or model
* @adapter: i2c subsystem adapter node
+ * @functionality: I2C_FUNC_* ORed bits to reflect what controller does support
+ * @master_cfg: configuration for the master device
* @slave_cfg: configuration for the slave device
* @tx_fifo_depth: depth of the hardware tx fifo
* @rx_fifo_depth: depth of the hardware rx fifo
@@ -228,7 +231,9 @@ struct reset_control;
* @disable: function to disable the controller
* @disable_int: function to disable all interrupts
* @init: function to initialize the I2C hardware
+ * @set_sda_hold_time: callback to retrieve IP specific SDA hold timing
* @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE
+ * @rinfo: I²C GPIO recovery information
* @suspended: set to true if the controller is suspended
*
* HCNT and LCNT parameters can be used if the platform knows more accurate
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 9b08bb5df38d..9177463c2cbb 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -701,7 +701,8 @@ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
regmap_read(dev->map, DW_IC_CLR_RX_DONE, &dummy);
if (stat & DW_IC_INTR_ACTIVITY)
regmap_read(dev->map, DW_IC_CLR_ACTIVITY, &dummy);
- if (stat & DW_IC_INTR_STOP_DET)
+ if ((stat & DW_IC_INTR_STOP_DET) &&
+ ((dev->rx_outstanding == 0) || (stat & DW_IC_INTR_RX_FULL)))
regmap_read(dev->map, DW_IC_CLR_STOP_DET, &dummy);
if (stat & DW_IC_INTR_START_DET)
regmap_read(dev->map, DW_IC_CLR_START_DET, &dummy);
@@ -723,6 +724,7 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
if (stat & DW_IC_INTR_TX_ABRT) {
dev->cmd_err |= DW_IC_ERR_TX_ABRT;
dev->status = STATUS_IDLE;
+ dev->rx_outstanding = 0;
/*
* Anytime TX_ABRT is set, the contents of the tx/rx
@@ -745,7 +747,8 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
*/
tx_aborted:
- if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
+ if (((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err) &&
+ (dev->rx_outstanding == 0))
complete(&dev->cmd_complete);
else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
/* Workaround to trigger pending interrupt */
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 0f409a4c2da0..ef4250f8852b 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -38,11 +38,18 @@ enum dw_pci_ctl_id_t {
navi_amd,
};
+/*
+ * This is a legacy structure to describe the hardware counters
+ * to configure signal timings on the bus. For Device Tree platforms
+ * one should use the respective properties and for ACPI there is
+ * a set of ACPI methods that provide these counters. No new
+ * platform should use this structure.
+ */
struct dw_scl_sda_cfg {
- u32 ss_hcnt;
- u32 fs_hcnt;
- u32 ss_lcnt;
- u32 fs_lcnt;
+ u16 ss_hcnt;
+ u16 fs_hcnt;
+ u16 ss_lcnt;
+ u16 fs_lcnt;
u32 sda_hold;
};
@@ -206,8 +213,7 @@ static struct dw_pci_controller dw_pci_controllers[] = {
},
};
-#ifdef CONFIG_PM
-static int i2c_dw_pci_suspend(struct device *dev)
+static int __maybe_unused i2c_dw_pci_suspend(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
@@ -217,7 +223,7 @@ static int i2c_dw_pci_suspend(struct device *dev)
return 0;
}
-static int i2c_dw_pci_resume(struct device *dev)
+static int __maybe_unused i2c_dw_pci_resume(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
int ret;
@@ -227,7 +233,6 @@ static int i2c_dw_pci_resume(struct device *dev)
return ret;
}
-#endif
static UNIVERSAL_DEV_PM_OPS(i2c_dw_pm_ops, i2c_dw_pci_suspend,
i2c_dw_pci_resume, NULL);
@@ -241,28 +246,24 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
struct dw_pci_controller *controller;
struct dw_scl_sda_cfg *cfg;
- if (id->driver_data >= ARRAY_SIZE(dw_pci_controllers)) {
- dev_err(&pdev->dev, "%s: invalid driver data %ld\n", __func__,
- id->driver_data);
- return -EINVAL;
- }
+ if (id->driver_data >= ARRAY_SIZE(dw_pci_controllers))
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Invalid driver data %ld\n",
+ id->driver_data);
controller = &dw_pci_controllers[id->driver_data];
r = pcim_enable_device(pdev);
- if (r) {
- dev_err(&pdev->dev, "Failed to enable I2C PCI device (%d)\n",
- r);
- return r;
- }
+ if (r)
+ return dev_err_probe(&pdev->dev, r,
+ "Failed to enable I2C PCI device\n");
pci_set_master(pdev);
r = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
- if (r) {
- dev_err(&pdev->dev, "I/O memory remapping failed\n");
- return r;
- }
+ if (r)
+ return dev_err_probe(&pdev->dev, r,
+ "I/O memory remapping failed\n");
dev = devm_kzalloc(&pdev->dev, sizeof(struct dw_i2c_dev), GFP_KERNEL);
if (!dev)
@@ -352,9 +353,6 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
pci_free_irq_vectors(pdev);
}
-/* work with hotplug and coldplug */
-MODULE_ALIAS("i2c_designware-pci");
-
static const struct pci_device_id i2_designware_pci_ids[] = {
/* Medfield */
{ PCI_VDEVICE(INTEL, 0x0817), medfield },
@@ -411,9 +409,10 @@ static struct pci_driver dw_i2c_driver = {
.pm = &i2c_dw_pm_ops,
},
};
-
module_pci_driver(dw_i2c_driver);
+/* Work with hotplug and coldplug */
+MODULE_ALIAS("i2c_designware-pci");
MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
MODULE_DESCRIPTION("Synopsys DesignWare PCI I2C bus adapter");
MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 21113665ddea..2bd81abc86f6 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -293,6 +293,8 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
DPM_FLAG_MAY_SKIP_RESUME);
}
+ device_enable_async_suspend(&pdev->dev);
+
/* The code below assumes runtime PM to be disabled. */
WARN_ON(pm_runtime_enabled(&pdev->dev));
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index 97d4f3ac0abd..b812d1090c0f 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -169,6 +169,7 @@
enum i2c_type_exynos {
I2C_TYPE_EXYNOS5,
I2C_TYPE_EXYNOS7,
+ I2C_TYPE_EXYNOSAUTOV9,
};
struct exynos5_i2c {
@@ -181,7 +182,8 @@ struct exynos5_i2c {
unsigned int irq;
void __iomem *regs;
- struct clk *clk;
+ struct clk *clk; /* operating clock */
+ struct clk *pclk; /* bus clock */
struct device *dev;
int state;
@@ -230,6 +232,11 @@ static const struct exynos_hsi2c_variant exynos7_hsi2c_data = {
.hw = I2C_TYPE_EXYNOS7,
};
+static const struct exynos_hsi2c_variant exynosautov9_hsi2c_data = {
+ .fifo_depth = 64,
+ .hw = I2C_TYPE_EXYNOSAUTOV9,
+};
+
static const struct of_device_id exynos5_i2c_match[] = {
{
.compatible = "samsung,exynos5-hsi2c",
@@ -243,6 +250,9 @@ static const struct of_device_id exynos5_i2c_match[] = {
}, {
.compatible = "samsung,exynos7-hsi2c",
.data = &exynos7_hsi2c_data
+ }, {
+ .compatible = "samsung,exynosautov9-hsi2c",
+ .data = &exynosautov9_hsi2c_data
}, {},
};
MODULE_DEVICE_TABLE(of, exynos5_i2c_match);
@@ -282,6 +292,31 @@ static int exynos5_i2c_set_timing(struct exynos5_i2c *i2c, bool hs_timings)
int div, clk_cycle, temp;
/*
+ * In case of HSI2C controllers in ExynosAutoV9:
+ *
+ * FSCL = IPCLK / ((CLK_DIV + 1) * 16)
+ * T_SCL_LOW = IPCLK * (CLK_DIV + 1) * (N + M)
+ * [N : number of 0's in the TSCL_H_HS]
+ * [M : number of 0's in the TSCL_L_HS]
+ * T_SCL_HIGH = IPCLK * (CLK_DIV + 1) * (N + M)
+ * [N : number of 1's in the TSCL_H_HS]
+ * [M : number of 1's in the TSCL_L_HS]
+ *
+ * Result of (N + M) is always 8.
+ * In general case, we don't need to control timing_s1 and timing_s2.
+ */
+ if (i2c->variant->hw == I2C_TYPE_EXYNOSAUTOV9) {
+ div = ((clkin / (16 * i2c->op_clock)) - 1);
+ i2c_timing_s3 = div << 16;
+ if (hs_timings)
+ writel(i2c_timing_s3, i2c->regs + HSI2C_TIMING_HS3);
+ else
+ writel(i2c_timing_s3, i2c->regs + HSI2C_TIMING_FS3);
+
+ return 0;
+ }
+
+ /*
* In case of HSI2C controller in Exynos5 series
* FPCLK / FI2C =
* (CLK_DIV + 1) * (TSCLK_L + TSCLK_H + 2) + 8 + 2 * FLT_CYCLE
@@ -422,7 +457,10 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
writel(int_status, i2c->regs + HSI2C_INT_STATUS);
/* handle interrupt related to the transfer status */
- if (i2c->variant->hw == I2C_TYPE_EXYNOS7) {
+ switch (i2c->variant->hw) {
+ case I2C_TYPE_EXYNOSAUTOV9:
+ fallthrough;
+ case I2C_TYPE_EXYNOS7:
if (int_status & HSI2C_INT_TRANS_DONE) {
i2c->trans_done = 1;
i2c->state = 0;
@@ -443,7 +481,12 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
i2c->state = -ETIMEDOUT;
goto stop;
}
- } else if (int_status & HSI2C_INT_I2C) {
+
+ break;
+ case I2C_TYPE_EXYNOS5:
+ if (!(int_status & HSI2C_INT_I2C))
+ break;
+
trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
if (trans_status & HSI2C_NO_DEV_ACK) {
dev_dbg(i2c->dev, "No ACK from device\n");
@@ -465,6 +508,8 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
i2c->trans_done = 1;
i2c->state = 0;
}
+
+ break;
}
if ((i2c->msg->flags & I2C_M_RD) && (int_status &
@@ -569,13 +614,13 @@ static void exynos5_i2c_bus_check(struct exynos5_i2c *i2c)
{
unsigned long timeout;
- if (i2c->variant->hw != I2C_TYPE_EXYNOS7)
+ if (i2c->variant->hw == I2C_TYPE_EXYNOS5)
return;
/*
- * HSI2C_MASTER_ST_LOSE state in EXYNOS7 variant before transaction
- * indicates that bus is stuck (SDA is low). In such case bus recovery
- * can be performed.
+ * HSI2C_MASTER_ST_LOSE state (in Exynos7 and ExynosAutoV9 variants)
+ * before transaction indicates that bus is stuck (SDA is low).
+ * In such case bus recovery can be performed.
*/
timeout = jiffies + msecs_to_jiffies(100);
for (;;) {
@@ -611,10 +656,10 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
unsigned long flags;
unsigned short trig_lvl;
- if (i2c->variant->hw == I2C_TYPE_EXYNOS7)
- int_en |= HSI2C_INT_I2C_TRANS;
- else
+ if (i2c->variant->hw == I2C_TYPE_EXYNOS5)
int_en |= HSI2C_INT_I2C;
+ else
+ int_en |= HSI2C_INT_I2C_TRANS;
i2c_ctl = readl(i2c->regs + HSI2C_CTL);
i2c_ctl &= ~(HSI2C_TXCHON | HSI2C_RXCHON);
@@ -713,10 +758,14 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
struct exynos5_i2c *i2c = adap->algo_data;
int i, ret;
- ret = clk_enable(i2c->clk);
+ ret = clk_enable(i2c->pclk);
if (ret)
return ret;
+ ret = clk_enable(i2c->clk);
+ if (ret)
+ goto err_pclk;
+
for (i = 0; i < num; ++i) {
ret = exynos5_i2c_xfer_msg(i2c, msgs + i, i + 1 == num);
if (ret)
@@ -724,6 +773,8 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
}
clk_disable(i2c->clk);
+err_pclk:
+ clk_disable(i2c->pclk);
return ret ?: num;
}
@@ -763,10 +814,20 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
return -ENOENT;
}
- ret = clk_prepare_enable(i2c->clk);
+ i2c->pclk = devm_clk_get_optional(&pdev->dev, "hsi2c_pclk");
+ if (IS_ERR(i2c->pclk)) {
+ return dev_err_probe(&pdev->dev, PTR_ERR(i2c->pclk),
+ "cannot get pclk");
+ }
+
+ ret = clk_prepare_enable(i2c->pclk);
if (ret)
return ret;
+ ret = clk_prepare_enable(i2c->clk);
+ if (ret)
+ goto err_pclk;
+
i2c->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(i2c->regs)) {
ret = PTR_ERR(i2c->regs);
@@ -809,11 +870,15 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, i2c);
clk_disable(i2c->clk);
+ clk_disable(i2c->pclk);
return 0;
err_clk:
clk_disable_unprepare(i2c->clk);
+
+ err_pclk:
+ clk_disable_unprepare(i2c->pclk);
return ret;
}
@@ -824,6 +889,7 @@ static int exynos5_i2c_remove(struct platform_device *pdev)
i2c_del_adapter(&i2c->adap);
clk_unprepare(i2c->clk);
+ clk_unprepare(i2c->pclk);
return 0;
}
@@ -835,6 +901,7 @@ static int exynos5_i2c_suspend_noirq(struct device *dev)
i2c_mark_adapter_suspended(&i2c->adap);
clk_unprepare(i2c->clk);
+ clk_unprepare(i2c->pclk);
return 0;
}
@@ -844,21 +911,30 @@ static int exynos5_i2c_resume_noirq(struct device *dev)
struct exynos5_i2c *i2c = dev_get_drvdata(dev);
int ret = 0;
- ret = clk_prepare_enable(i2c->clk);
+ ret = clk_prepare_enable(i2c->pclk);
if (ret)
return ret;
+ ret = clk_prepare_enable(i2c->clk);
+ if (ret)
+ goto err_pclk;
+
ret = exynos5_hsi2c_clock_setup(i2c);
- if (ret) {
- clk_disable_unprepare(i2c->clk);
- return ret;
- }
+ if (ret)
+ goto err_clk;
exynos5_i2c_init(i2c);
clk_disable(i2c->clk);
+ clk_disable(i2c->pclk);
i2c_mark_adapter_resumed(&i2c->adap);
return 0;
+
+err_clk:
+ clk_disable_unprepare(i2c->clk);
+err_pclk:
+ clk_disable_unprepare(i2c->pclk);
+ return ret;
}
#endif
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 41446f9cc52d..7428cc6af5cc 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -328,22 +328,14 @@ static int i801_check_pre(struct i801_priv *priv)
status = inb_p(SMBHSTSTS(priv));
if (status & SMBHSTSTS_HOST_BUSY) {
- dev_err(&priv->pci_dev->dev, "SMBus is busy, can't use it!\n");
+ pci_err(priv->pci_dev, "SMBus is busy, can't use it!\n");
return -EBUSY;
}
status &= STATUS_FLAGS;
if (status) {
- dev_dbg(&priv->pci_dev->dev, "Clearing status flags (%02x)\n",
- status);
+ pci_dbg(priv->pci_dev, "Clearing status flags (%02x)\n", status);
outb_p(status, SMBHSTSTS(priv));
- status = inb_p(SMBHSTSTS(priv)) & STATUS_FLAGS;
- if (status) {
- dev_err(&priv->pci_dev->dev,
- "Failed clearing status flags (%02x)\n",
- status);
- return -EBUSY;
- }
}
/*
@@ -356,27 +348,14 @@ static int i801_check_pre(struct i801_priv *priv)
if (priv->features & FEATURE_SMBUS_PEC) {
status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
if (status) {
- dev_dbg(&priv->pci_dev->dev,
- "Clearing aux status flags (%02x)\n", status);
+ pci_dbg(priv->pci_dev, "Clearing aux status flags (%02x)\n", status);
outb_p(status, SMBAUXSTS(priv));
- status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
- if (status) {
- dev_err(&priv->pci_dev->dev,
- "Failed clearing aux status flags (%02x)\n",
- status);
- return -EBUSY;
- }
}
}
return 0;
}
-/*
- * Convert the status register to an error code, and clear it.
- * Note that status only contains the bits we want to clear, not the
- * actual register value.
- */
static int i801_check_post(struct i801_priv *priv, int status)
{
int result = 0;
@@ -401,7 +380,6 @@ static int i801_check_post(struct i801_priv *priv, int status)
!(status & SMBHSTSTS_FAILED))
dev_err(&priv->pci_dev->dev,
"Failed terminating the transaction\n");
- outb_p(STATUS_FLAGS, SMBHSTSTS(priv));
return -ETIMEDOUT;
}
@@ -440,9 +418,6 @@ static int i801_check_post(struct i801_priv *priv, int status)
dev_dbg(&priv->pci_dev->dev, "Lost arbitration\n");
}
- /* Clear status flags except BYTE_DONE, to be cleared by caller */
- outb_p(status, SMBHSTSTS(priv));
-
return result;
}
@@ -523,9 +498,11 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
return -EOPNOTSUPP;
}
+ /* Set block buffer mode */
+ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv));
+
inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
- /* Use 32-byte buffer to process this transaction */
if (read_write == I2C_SMBUS_WRITE) {
len = data->block[0];
outb_p(len, SMBHSTDAT0(priv));
@@ -760,14 +737,6 @@ exit:
return i801_check_post(priv, status);
}
-static int i801_set_block_buffer_mode(struct i801_priv *priv)
-{
- outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv));
- if ((inb_p(SMBAUXCTL(priv)) & SMBAUXCTL_E32B) == 0)
- return -EIO;
- return 0;
-}
-
/* Block transaction function */
static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *data,
char read_write, int command)
@@ -775,6 +744,11 @@ static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *
int result = 0;
unsigned char hostc;
+ if (read_write == I2C_SMBUS_READ && command == I2C_SMBUS_BLOCK_DATA)
+ data->block[0] = I2C_SMBUS_BLOCK_MAX;
+ else if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
+ return -EPROTO;
+
if (command == I2C_SMBUS_I2C_BLOCK_DATA) {
if (read_write == I2C_SMBUS_WRITE) {
/* set I2C_EN bit in configuration register */
@@ -788,22 +762,11 @@ static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *
}
}
- if (read_write == I2C_SMBUS_WRITE
- || command == I2C_SMBUS_I2C_BLOCK_DATA) {
- if (data->block[0] < 1)
- data->block[0] = 1;
- if (data->block[0] > I2C_SMBUS_BLOCK_MAX)
- data->block[0] = I2C_SMBUS_BLOCK_MAX;
- } else {
- data->block[0] = 32; /* max for SMBus block reads */
- }
-
/* Experience has shown that the block buffer can only be used for
SMBus (not I2C) block transactions, even though the datasheet
doesn't mention this limitation. */
- if ((priv->features & FEATURE_BLOCK_BUFFER)
- && command != I2C_SMBUS_I2C_BLOCK_DATA
- && i801_set_block_buffer_mode(priv) == 0)
+ if ((priv->features & FEATURE_BLOCK_BUFFER) &&
+ command != I2C_SMBUS_I2C_BLOCK_DATA)
result = i801_block_transaction_by_block(priv, data,
read_write,
command);
@@ -951,8 +914,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
}
out:
- /* Unlock the SMBus device for use by BIOS/ACPI */
- outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv));
+ /*
+ * Unlock the SMBus device for use by BIOS/ACPI,
+ * and clear status flags if not done already.
+ */
+ outb_p(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv));
pm_runtime_mark_last_busy(&priv->pci_dev->dev);
pm_runtime_put_autosuspend(&priv->pci_dev->dev);
@@ -1009,66 +975,72 @@ static const struct i2c_algorithm smbus_algorithm = {
.functionality = i801_func,
};
+#define FEATURES_ICH5 (FEATURE_BLOCK_PROC | FEATURE_I2C_BLOCK_READ | \
+ FEATURE_IRQ | FEATURE_SMBUS_PEC | \
+ FEATURE_BLOCK_BUFFER | FEATURE_HOST_NOTIFY)
+#define FEATURES_ICH4 (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER | \
+ FEATURE_HOST_NOTIFY)
+
static const struct pci_device_id i801_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_3) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_4) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_16) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_17) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_17) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_5) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_6) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EP80579_1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_4) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS0) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COLETOCREEK_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WILDCATPOINT_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_N_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ALDER_LAKE_P_SMBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ALDER_LAKE_M_SMBUS) },
+ { PCI_DEVICE_DATA(INTEL, 82801AA_3, 0) },
+ { PCI_DEVICE_DATA(INTEL, 82801AB_3, 0) },
+ { PCI_DEVICE_DATA(INTEL, 82801BA_2, 0) },
+ { PCI_DEVICE_DATA(INTEL, 82801CA_3, FEATURE_HOST_NOTIFY) },
+ { PCI_DEVICE_DATA(INTEL, 82801DB_3, FEATURES_ICH4) },
+ { PCI_DEVICE_DATA(INTEL, 82801EB_3, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ESB_4, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH6_16, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH7_17, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ESB2_17, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH8_5, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH9_6, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, EP80579_1, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH10_4, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, ICH10_5, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, 5_3400_SERIES_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, COUGARPOINT_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, PATSBURG_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, PATSBURG_SMBUS_IDF0, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, PATSBURG_SMBUS_IDF1, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, PATSBURG_SMBUS_IDF2, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, DH89XXCC_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, PANTHERPOINT_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, LYNXPOINT_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, LYNXPOINT_LP_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, AVOTON_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, WELLSBURG_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, WELLSBURG_SMBUS_MS0, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, WELLSBURG_SMBUS_MS1, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, WELLSBURG_SMBUS_MS2, FEATURES_ICH5 | FEATURE_IDF) },
+ { PCI_DEVICE_DATA(INTEL, COLETOCREEK_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, GEMINILAKE_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, WILDCATPOINT_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, WILDCATPOINT_LP_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, BAYTRAIL_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, BRASWELL_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, SUNRISEPOINT_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, SUNRISEPOINT_LP_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, CDF_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, DNV_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, EBG_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, BROXTON_SMBUS, FEATURES_ICH5) },
+ { PCI_DEVICE_DATA(INTEL, LEWISBURG_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, LEWISBURG_SSKU_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, KABYLAKE_PCH_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, CANNONLAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, CANNONLAKE_LP_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, ICELAKE_LP_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, ICELAKE_N_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, COMETLAKE_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, COMETLAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, COMETLAKE_V_SMBUS, FEATURES_ICH5 | FEATURE_TCO_SPT) },
+ { PCI_DEVICE_DATA(INTEL, ELKHART_LAKE_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, TIGERLAKE_LP_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, TIGERLAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, JASPER_LAKE_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, ALDER_LAKE_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, ALDER_LAKE_P_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
+ { PCI_DEVICE_DATA(INTEL, ALDER_LAKE_M_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) },
{ 0, }
};
@@ -1493,15 +1465,14 @@ static inline unsigned int i801_get_adapter_class(struct i801_priv *priv)
}
#endif
-static const struct itco_wdt_platform_data spt_tco_platform_data = {
- .name = "Intel PCH",
- .version = 4,
-};
-
static struct platform_device *
i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
struct resource *tco_res)
{
+ static const struct itco_wdt_platform_data pldata = {
+ .name = "Intel PCH",
+ .version = 4,
+ };
struct resource *res;
unsigned int devfn;
u64 base64_addr;
@@ -1544,22 +1515,20 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
res->flags = IORESOURCE_MEM;
return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 2, &spt_tco_platform_data,
- sizeof(spt_tco_platform_data));
+ tco_res, 2, &pldata, sizeof(pldata));
}
-static const struct itco_wdt_platform_data cnl_tco_platform_data = {
- .name = "Intel PCH",
- .version = 6,
-};
-
static struct platform_device *
i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
struct resource *tco_res)
{
- return platform_device_register_resndata(&pci_dev->dev,
- "iTCO_wdt", -1, tco_res, 1, &cnl_tco_platform_data,
- sizeof(cnl_tco_platform_data));
+ static const struct itco_wdt_platform_data pldata = {
+ .name = "Intel PCH",
+ .version = 6,
+ };
+
+ return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+ tco_res, 1, &pldata, sizeof(pldata));
}
static void i801_add_tco(struct i801_priv *priv)
@@ -1697,72 +1666,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
mutex_init(&priv->acpi_lock);
priv->pci_dev = dev;
- switch (dev->device) {
- case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
- case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
- case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
- case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
- case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS:
- priv->features |= FEATURE_BLOCK_PROC;
- priv->features |= FEATURE_I2C_BLOCK_READ;
- priv->features |= FEATURE_IRQ;
- priv->features |= FEATURE_SMBUS_PEC;
- priv->features |= FEATURE_BLOCK_BUFFER;
- priv->features |= FEATURE_TCO_SPT;
- priv->features |= FEATURE_HOST_NOTIFY;
- break;
-
- case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
- case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
- case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS:
- case PCI_DEVICE_ID_INTEL_ICELAKE_N_SMBUS:
- case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS:
- case PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS:
- case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS:
- case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS:
- case PCI_DEVICE_ID_INTEL_EBG_SMBUS:
- case PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS:
- case PCI_DEVICE_ID_INTEL_ALDER_LAKE_P_SMBUS:
- case PCI_DEVICE_ID_INTEL_ALDER_LAKE_M_SMBUS:
- priv->features |= FEATURE_BLOCK_PROC;
- priv->features |= FEATURE_I2C_BLOCK_READ;
- priv->features |= FEATURE_IRQ;
- priv->features |= FEATURE_SMBUS_PEC;
- priv->features |= FEATURE_BLOCK_BUFFER;
- priv->features |= FEATURE_TCO_CNL;
- priv->features |= FEATURE_HOST_NOTIFY;
- break;
-
- case PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0:
- case PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1:
- case PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2:
- case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS0:
- case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1:
- case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2:
- priv->features |= FEATURE_IDF;
- fallthrough;
- default:
- priv->features |= FEATURE_BLOCK_PROC;
- priv->features |= FEATURE_I2C_BLOCK_READ;
- priv->features |= FEATURE_IRQ;
- fallthrough;
- case PCI_DEVICE_ID_INTEL_82801DB_3:
- priv->features |= FEATURE_SMBUS_PEC;
- priv->features |= FEATURE_BLOCK_BUFFER;
- fallthrough;
- case PCI_DEVICE_ID_INTEL_82801CA_3:
- priv->features |= FEATURE_HOST_NOTIFY;
- fallthrough;
- case PCI_DEVICE_ID_INTEL_82801BA_2:
- case PCI_DEVICE_ID_INTEL_82801AB_3:
- case PCI_DEVICE_ID_INTEL_82801AA_3:
- break;
- }
+ priv->features = id->driver_data;
/* Disable features on user request */
for (i = 0; i < ARRAY_SIZE(i801_feature_names); i++) {
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 3576b63a6c03..27f969b3dc07 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -37,6 +37,8 @@
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/hrtimer.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -51,6 +53,8 @@
/* This will be the driver name the kernel reports */
#define DRIVER_NAME "imx-i2c"
+#define I2C_IMX_CHECK_DELAY 30000 /* Time to check for bus idle, in NS */
+
/*
* Enable DMA if transfer byte size is bigger than this threshold.
* As the hardware request, it must bigger than 4 bytes.\
@@ -210,6 +214,10 @@ struct imx_i2c_struct {
struct imx_i2c_dma *dma;
struct i2c_client *slave;
enum i2c_slave_event last_slave_event;
+
+ /* For checking slave events. */
+ spinlock_t slave_lock;
+ struct hrtimer slave_timer;
};
static const struct imx_i2c_hwdata imx1_i2c_hwdata = {
@@ -680,7 +688,7 @@ static void i2c_imx_slave_event(struct imx_i2c_struct *i2c_imx,
static void i2c_imx_slave_finish_op(struct imx_i2c_struct *i2c_imx)
{
- u8 val;
+ u8 val = 0;
while (i2c_imx->last_slave_event != I2C_SLAVE_STOP) {
switch (i2c_imx->last_slave_event) {
@@ -701,10 +709,11 @@ static void i2c_imx_slave_finish_op(struct imx_i2c_struct *i2c_imx)
}
}
-static irqreturn_t i2c_imx_slave_isr(struct imx_i2c_struct *i2c_imx,
- unsigned int status, unsigned int ctl)
+/* Returns true if the timer should be restarted, false if not. */
+static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx,
+ unsigned int status, unsigned int ctl)
{
- u8 value;
+ u8 value = 0;
if (status & I2SR_IAL) { /* Arbitration lost */
i2c_imx_clear_irq(i2c_imx, I2SR_IAL);
@@ -712,6 +721,16 @@ static irqreturn_t i2c_imx_slave_isr(struct imx_i2c_struct *i2c_imx,
return IRQ_HANDLED;
}
+ if (!(status & I2SR_IBB)) {
+ /* No master on the bus, that could mean a stop condition. */
+ i2c_imx_slave_finish_op(i2c_imx);
+ return IRQ_HANDLED;
+ }
+
+ if (!(status & I2SR_ICF))
+ /* Data transfer still in progress, ignore this. */
+ goto out;
+
if (status & I2SR_IAAS) { /* Addressed as a slave */
i2c_imx_slave_finish_op(i2c_imx);
if (status & I2SR_SRW) { /* Master wants to read from us*/
@@ -737,16 +756,9 @@ static irqreturn_t i2c_imx_slave_isr(struct imx_i2c_struct *i2c_imx,
imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
}
} else if (!(ctl & I2CR_MTX)) { /* Receive mode */
- if (status & I2SR_IBB) { /* No STOP signal detected */
- value = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
- i2c_imx_slave_event(i2c_imx,
- I2C_SLAVE_WRITE_RECEIVED, &value);
- } else { /* STOP signal is detected */
- dev_dbg(&i2c_imx->adapter.dev,
- "STOP signal detected");
- i2c_imx_slave_event(i2c_imx,
- I2C_SLAVE_STOP, &value);
- }
+ value = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+ i2c_imx_slave_event(i2c_imx,
+ I2C_SLAVE_WRITE_RECEIVED, &value);
} else if (!(status & I2SR_RXAK)) { /* Transmit mode received ACK */
ctl |= I2CR_MTX;
imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR);
@@ -755,15 +767,43 @@ static irqreturn_t i2c_imx_slave_isr(struct imx_i2c_struct *i2c_imx,
I2C_SLAVE_READ_PROCESSED, &value);
imx_i2c_write_reg(value, i2c_imx, IMX_I2C_I2DR);
- } else { /* Transmit mode received NAK */
+ } else { /* Transmit mode received NAK, operation is done */
ctl &= ~I2CR_MTX;
imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR);
imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+ i2c_imx_slave_finish_op(i2c_imx);
+ return IRQ_HANDLED;
}
+out:
+ /*
+ * No need to check the return value here. If it returns 0 or
+ * 1, then everything is fine. If it returns -1, then the
+ * timer is running in the handler. This will still work,
+ * though it may be redone (or already have been done) by the
+ * timer function.
+ */
+ hrtimer_try_to_cancel(&i2c_imx->slave_timer);
+ hrtimer_forward_now(&i2c_imx->slave_timer, I2C_IMX_CHECK_DELAY);
+ hrtimer_restart(&i2c_imx->slave_timer);
return IRQ_HANDLED;
}
+static enum hrtimer_restart i2c_imx_slave_timeout(struct hrtimer *t)
+{
+ struct imx_i2c_struct *i2c_imx = container_of(t, struct imx_i2c_struct,
+ slave_timer);
+ unsigned int ctl, status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i2c_imx->slave_lock, flags);
+ status = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2SR);
+ ctl = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
+ i2c_imx_slave_handle(i2c_imx, status, ctl);
+ spin_unlock_irqrestore(&i2c_imx->slave_lock, flags);
+ return HRTIMER_NORESTART;
+}
+
static void i2c_imx_slave_init(struct imx_i2c_struct *i2c_imx)
{
int temp;
@@ -843,7 +883,9 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id)
{
struct imx_i2c_struct *i2c_imx = dev_id;
unsigned int ctl, status;
+ unsigned long flags;
+ spin_lock_irqsave(&i2c_imx->slave_lock, flags);
status = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2SR);
ctl = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
@@ -851,14 +893,20 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id)
i2c_imx_clear_irq(i2c_imx, I2SR_IIF);
if (i2c_imx->slave) {
if (!(ctl & I2CR_MSTA)) {
- return i2c_imx_slave_isr(i2c_imx, status, ctl);
- } else if (i2c_imx->last_slave_event !=
- I2C_SLAVE_STOP) {
- i2c_imx_slave_finish_op(i2c_imx);
+ irqreturn_t ret;
+
+ ret = i2c_imx_slave_handle(i2c_imx,
+ status, ctl);
+ spin_unlock_irqrestore(&i2c_imx->slave_lock,
+ flags);
+ return ret;
}
+ i2c_imx_slave_finish_op(i2c_imx);
}
+ spin_unlock_irqrestore(&i2c_imx->slave_lock, flags);
return i2c_imx_master_isr(i2c_imx, status);
}
+ spin_unlock_irqrestore(&i2c_imx->slave_lock, flags);
return IRQ_NONE;
}
@@ -1378,6 +1426,10 @@ static int i2c_imx_probe(struct platform_device *pdev)
if (!i2c_imx)
return -ENOMEM;
+ spin_lock_init(&i2c_imx->slave_lock);
+ hrtimer_init(&i2c_imx->slave_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ i2c_imx->slave_timer.function = i2c_imx_slave_timeout;
+
match = device_get_match_data(&pdev->dev);
if (match)
i2c_imx->hwdata = match;
@@ -1491,6 +1543,8 @@ static int i2c_imx_remove(struct platform_device *pdev)
if (ret < 0)
return ret;
+ hrtimer_cancel(&i2c_imx->slave_timer);
+
/* remove adapter */
dev_dbg(&i2c_imx->adapter.dev, "adapter removed\n");
i2c_del_adapter(&i2c_imx->adapter);
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index db26cc36e13f..6c698c10d3cd 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -119,23 +119,30 @@ static inline void writeccr(struct mpc_i2c *i2c, u32 x)
/* Sometimes 9th clock pulse isn't generated, and slave doesn't release
* the bus, because it wants to send ACK.
* Following sequence of enabling/disabling and sending start/stop generates
- * the 9 pulses, so it's all OK.
+ * the 9 pulses, each with a START then ending with STOP, so it's all OK.
*/
static void mpc_i2c_fixup(struct mpc_i2c *i2c)
{
int k;
- u32 delay_val = 1000000 / i2c->real_clk + 1;
-
- if (delay_val < 2)
- delay_val = 2;
+ unsigned long flags;
for (k = 9; k; k--) {
writeccr(i2c, 0);
- writeccr(i2c, CCR_MSTA | CCR_MTX | CCR_MEN);
+ writeb(0, i2c->base + MPC_I2C_SR); /* clear any status bits */
+ writeccr(i2c, CCR_MEN | CCR_MSTA); /* START */
+ readb(i2c->base + MPC_I2C_DR); /* init xfer */
+ udelay(15); /* let it hit the bus */
+ local_irq_save(flags); /* should not be delayed further */
+ writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSTA); /* delay SDA */
readb(i2c->base + MPC_I2C_DR);
- writeccr(i2c, CCR_MEN);
- udelay(delay_val << 1);
+ if (k != 1)
+ udelay(5);
+ local_irq_restore(flags);
}
+ writeccr(i2c, CCR_MEN); /* Initiate STOP */
+ readb(i2c->base + MPC_I2C_DR);
+ udelay(15); /* Let STOP propagate */
+ writeccr(i2c, 0);
}
static int i2c_mpc_wait_sr(struct mpc_i2c *i2c, int mask)
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index fc13511f4562..f71c730f9838 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -367,11 +367,15 @@ static void rcar_i2c_next_msg(struct rcar_i2c_priv *priv)
rcar_i2c_prepare_msg(priv);
}
-static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
+static void rcar_i2c_cleanup_dma(struct rcar_i2c_priv *priv, bool terminate)
{
struct dma_chan *chan = priv->dma_direction == DMA_FROM_DEVICE
? priv->dma_rx : priv->dma_tx;
+ /* only allowed from thread context! */
+ if (terminate)
+ dmaengine_terminate_sync(chan);
+
dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
sg_dma_len(&priv->sg), priv->dma_direction);
@@ -386,25 +390,13 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
rcar_i2c_write(priv, ICDMAER, 0);
}
-static void rcar_i2c_cleanup_dma(struct rcar_i2c_priv *priv)
-{
- if (priv->dma_direction == DMA_NONE)
- return;
- else if (priv->dma_direction == DMA_FROM_DEVICE)
- dmaengine_terminate_all(priv->dma_rx);
- else if (priv->dma_direction == DMA_TO_DEVICE)
- dmaengine_terminate_all(priv->dma_tx);
-
- rcar_i2c_dma_unmap(priv);
-}
-
static void rcar_i2c_dma_callback(void *data)
{
struct rcar_i2c_priv *priv = data;
priv->pos += sg_dma_len(&priv->sg);
- rcar_i2c_dma_unmap(priv);
+ rcar_i2c_cleanup_dma(priv, false);
}
static bool rcar_i2c_dma(struct rcar_i2c_priv *priv)
@@ -456,7 +448,7 @@ static bool rcar_i2c_dma(struct rcar_i2c_priv *priv)
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!txdesc) {
dev_dbg(dev, "dma prep slave sg failed, using PIO\n");
- rcar_i2c_cleanup_dma(priv);
+ rcar_i2c_cleanup_dma(priv, false);
return false;
}
@@ -466,7 +458,7 @@ static bool rcar_i2c_dma(struct rcar_i2c_priv *priv)
cookie = dmaengine_submit(txdesc);
if (dma_submit_error(cookie)) {
dev_dbg(dev, "submitting dma failed, using PIO\n");
- rcar_i2c_cleanup_dma(priv);
+ rcar_i2c_cleanup_dma(priv, false);
return false;
}
@@ -846,7 +838,7 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
/* cleanup DMA if it couldn't complete properly due to an error */
if (priv->dma_direction != DMA_NONE)
- rcar_i2c_cleanup_dma(priv);
+ rcar_i2c_cleanup_dma(priv, true);
if (!time_left) {
rcar_i2c_init(priv);
diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c
index 78b84445ee6a..8dfd27dc6149 100644
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -433,12 +433,12 @@ static int riic_i2c_probe(struct platform_device *pdev)
}
for (i = 0; i < ARRAY_SIZE(riic_irqs); i++) {
- res = platform_get_resource(pdev, IORESOURCE_IRQ, riic_irqs[i].res_num);
- if (!res)
- return -ENODEV;
+ ret = platform_get_irq(pdev, riic_irqs[i].res_num);
+ if (ret < 0)
+ return ret;
- ret = devm_request_irq(&pdev->dev, res->start, riic_irqs[i].isr,
- 0, riic_irqs[i].name, riic);
+ ret = devm_request_irq(&pdev->dev, ret, riic_irqs[i].isr,
+ 0, riic_irqs[i].name, riic);
if (ret) {
dev_err(&pdev->dev, "failed to request irq %s\n", riic_irqs[i].name);
return ret;
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
index 02ddb237f69a..989040a73626 100644
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -1338,8 +1338,15 @@ static int rk3x_i2c_probe(struct platform_device *pdev)
goto err_pclk;
}
+ ret = clk_enable(i2c->clk);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Can't enable bus clk: %d\n", ret);
+ goto err_clk_notifier;
+ }
+
clk_rate = clk_get_rate(i2c->clk);
rk3x_i2c_adapt_div(i2c, clk_rate);
+ clk_disable(i2c->clk);
ret = i2c_add_adapter(&i2c->adap);
if (ret < 0)
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index db8fa4186814..72f024a0c363 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -442,34 +442,26 @@ static irqreturn_t sh_mobile_i2c_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void sh_mobile_i2c_dma_unmap(struct sh_mobile_i2c_data *pd)
+static void sh_mobile_i2c_cleanup_dma(struct sh_mobile_i2c_data *pd, bool terminate)
{
struct dma_chan *chan = pd->dma_direction == DMA_FROM_DEVICE
? pd->dma_rx : pd->dma_tx;
+ /* only allowed from thread context! */
+ if (terminate)
+ dmaengine_terminate_sync(chan);
+
dma_unmap_single(chan->device->dev, sg_dma_address(&pd->sg),
pd->msg->len, pd->dma_direction);
pd->dma_direction = DMA_NONE;
}
-static void sh_mobile_i2c_cleanup_dma(struct sh_mobile_i2c_data *pd)
-{
- if (pd->dma_direction == DMA_NONE)
- return;
- else if (pd->dma_direction == DMA_FROM_DEVICE)
- dmaengine_terminate_sync(pd->dma_rx);
- else if (pd->dma_direction == DMA_TO_DEVICE)
- dmaengine_terminate_sync(pd->dma_tx);
-
- sh_mobile_i2c_dma_unmap(pd);
-}
-
static void sh_mobile_i2c_dma_callback(void *data)
{
struct sh_mobile_i2c_data *pd = data;
- sh_mobile_i2c_dma_unmap(pd);
+ sh_mobile_i2c_cleanup_dma(pd, false);
pd->pos = pd->msg->len;
pd->stop_after_dma = true;
@@ -549,7 +541,7 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd)
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!txdesc) {
dev_dbg(pd->dev, "dma prep slave sg failed, using PIO\n");
- sh_mobile_i2c_cleanup_dma(pd);
+ sh_mobile_i2c_cleanup_dma(pd, false);
return;
}
@@ -559,7 +551,7 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd)
cookie = dmaengine_submit(txdesc);
if (dma_submit_error(cookie)) {
dev_dbg(pd->dev, "submitting dma failed, using PIO\n");
- sh_mobile_i2c_cleanup_dma(pd);
+ sh_mobile_i2c_cleanup_dma(pd, false);
return;
}
@@ -698,7 +690,7 @@ static int sh_mobile_xfer(struct sh_mobile_i2c_data *pd,
if (!time_left) {
dev_err(pd->dev, "Transfer request timed out\n");
if (pd->dma_direction != DMA_NONE)
- sh_mobile_i2c_cleanup_dma(pd);
+ sh_mobile_i2c_cleanup_dma(pd, true);
err = -ETIMEDOUT;
break;
@@ -838,20 +830,38 @@ static void sh_mobile_i2c_release_dma(struct sh_mobile_i2c_data *pd)
static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, struct sh_mobile_i2c_data *pd)
{
- struct resource *res;
- resource_size_t n;
+ struct device_node *np = dev_of_node(&dev->dev);
int k = 0, ret;
- while ((res = platform_get_resource(dev, IORESOURCE_IRQ, k))) {
- for (n = res->start; n <= res->end; n++) {
- ret = devm_request_irq(&dev->dev, n, sh_mobile_i2c_isr,
- 0, dev_name(&dev->dev), pd);
+ if (np) {
+ int irq;
+
+ while ((irq = platform_get_irq_optional(dev, k)) != -ENXIO) {
+ if (irq < 0)
+ return irq;
+ ret = devm_request_irq(&dev->dev, irq, sh_mobile_i2c_isr,
+ 0, dev_name(&dev->dev), pd);
if (ret) {
- dev_err(&dev->dev, "cannot request IRQ %pa\n", &n);
+ dev_err(&dev->dev, "cannot request IRQ %d\n", irq);
return ret;
}
+ k++;
+ }
+ } else {
+ struct resource *res;
+ resource_size_t n;
+
+ while ((res = platform_get_resource(dev, IORESOURCE_IRQ, k))) {
+ for (n = res->start; n <= res->end; n++) {
+ ret = devm_request_irq(&dev->dev, n, sh_mobile_i2c_isr,
+ 0, dev_name(&dev->dev), pd);
+ if (ret) {
+ dev_err(&dev->dev, "cannot request IRQ %pa\n", &n);
+ return ret;
+ }
+ }
+ k++;
}
- k++;
}
return k > 0 ? 0 : -ENOENT;
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index 66145d2b9b55..6d4aa64b195d 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -828,18 +828,14 @@ static void stm32f7_i2c_smbus_reload(struct stm32f7_i2c_dev *i2c_dev)
writel_relaxed(cr2, i2c_dev->base + STM32F7_I2C_CR2);
}
-static int stm32f7_i2c_release_bus(struct i2c_adapter *i2c_adap)
+static void stm32f7_i2c_release_bus(struct i2c_adapter *i2c_adap)
{
struct stm32f7_i2c_dev *i2c_dev = i2c_get_adapdata(i2c_adap);
- dev_info(i2c_dev->dev, "Trying to recover bus\n");
-
stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1,
STM32F7_I2C_CR1_PE);
stm32f7_i2c_hw_config(i2c_dev);
-
- return 0;
}
static int stm32f7_i2c_wait_free_bus(struct stm32f7_i2c_dev *i2c_dev)
@@ -854,13 +850,7 @@ static int stm32f7_i2c_wait_free_bus(struct stm32f7_i2c_dev *i2c_dev)
if (!ret)
return 0;
- dev_info(i2c_dev->dev, "bus busy\n");
-
- ret = stm32f7_i2c_release_bus(&i2c_dev->adap);
- if (ret) {
- dev_err(i2c_dev->dev, "Failed to recover the bus (%d)\n", ret);
- return ret;
- }
+ stm32f7_i2c_release_bus(&i2c_dev->adap);
return -EBUSY;
}
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index b3184c422826..03cea102ab76 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -6,6 +6,7 @@
* Author: Colin Cross <ccross@android.com>
*/
+#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
@@ -245,7 +246,7 @@ struct tegra_i2c_hw_feature {
* @msg_buf: pointer to current message data
* @msg_buf_remaining: size of unsent data in the message buffer
* @msg_read: indicates that the transfer is a read access
- * @bus_clk_rate: current I2C bus clock rate
+ * @timings: i2c timings information like bus frequency
* @multimaster_mode: indicates that I2C controller is in multi-master mode
* @tx_dma_chan: DMA transmit channel
* @rx_dma_chan: DMA receive channel
@@ -272,7 +273,7 @@ struct tegra_i2c_dev {
unsigned int nclocks;
struct clk *div_clk;
- u32 bus_clk_rate;
+ struct i2c_timings timings;
struct completion msg_complete;
size_t msg_buf_remaining;
@@ -608,6 +609,8 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev)
static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
{
u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode;
+ acpi_handle handle = ACPI_HANDLE(i2c_dev->dev);
+ struct i2c_timings *t = &i2c_dev->timings;
int err;
/*
@@ -618,7 +621,11 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
* emit a noisy warning on error, which won't stay unnoticed and
* won't hose machine entirely.
*/
- err = reset_control_reset(i2c_dev->rst);
+ if (handle)
+ err = acpi_evaluate_object(handle, "_RST", NULL, NULL);
+ else
+ err = reset_control_reset(i2c_dev->rst);
+
WARN_ON_ONCE(err);
if (i2c_dev->is_dvc)
@@ -636,14 +643,14 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
if (i2c_dev->is_vi)
tegra_i2c_vi_init(i2c_dev);
- switch (i2c_dev->bus_clk_rate) {
+ switch (t->bus_freq_hz) {
case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ:
default:
tlow = i2c_dev->hw->tlow_fast_fastplus_mode;
thigh = i2c_dev->hw->thigh_fast_fastplus_mode;
tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode;
- if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ)
+ if (t->bus_freq_hz > I2C_MAX_FAST_MODE_FREQ)
non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode;
else
non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode;
@@ -679,7 +686,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
clk_multiplier = (tlow + thigh + 2) * (non_hs_mode + 1);
err = clk_set_rate(i2c_dev->div_clk,
- i2c_dev->bus_clk_rate * clk_multiplier);
+ t->bus_freq_hz * clk_multiplier);
if (err) {
dev_err(i2c_dev->dev, "failed to set div-clk rate: %d\n", err);
return err;
@@ -718,7 +725,7 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
* before disabling the controller so that the STOP condition has
* been delivered properly.
*/
- udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate));
+ udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->timings.bus_freq_hz));
cnfg = i2c_readl(i2c_dev, I2C_CNFG);
if (cnfg & I2C_CNFG_PACKET_MODE_EN)
@@ -1248,7 +1255,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
* Total bits = 9 bits per byte (including ACK bit) + Start & stop bits
*/
xfer_time += DIV_ROUND_CLOSEST(((xfer_size * 9) + 2) * MSEC_PER_SEC,
- i2c_dev->bus_clk_rate);
+ i2c_dev->timings.bus_freq_hz);
int_mask = I2C_INT_NO_ACK | I2C_INT_ARBITRATION_LOST;
tegra_i2c_unmask_irq(i2c_dev, int_mask);
@@ -1625,14 +1632,10 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev)
{
struct device_node *np = i2c_dev->dev->of_node;
bool multi_mode;
- int err;
- err = of_property_read_u32(np, "clock-frequency",
- &i2c_dev->bus_clk_rate);
- if (err)
- i2c_dev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ;
+ i2c_parse_fw_timings(i2c_dev->dev, &i2c_dev->timings, true);
- multi_mode = of_property_read_bool(np, "multi-master");
+ multi_mode = device_property_read_bool(i2c_dev->dev, "multi-master");
i2c_dev->multimaster_mode = multi_mode;
if (of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc"))
@@ -1642,10 +1645,26 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev)
i2c_dev->is_vi = true;
}
+static int tegra_i2c_init_reset(struct tegra_i2c_dev *i2c_dev)
+{
+ if (ACPI_HANDLE(i2c_dev->dev))
+ return 0;
+
+ i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c");
+ if (IS_ERR(i2c_dev->rst))
+ return dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst),
+ "failed to get reset control\n");
+
+ return 0;
+}
+
static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev)
{
int err;
+ if (ACPI_HANDLE(i2c_dev->dev))
+ return 0;
+
i2c_dev->clocks[i2c_dev->nclocks++].id = "div-clk";
if (i2c_dev->hw == &tegra20_i2c_hw || i2c_dev->hw == &tegra30_i2c_hw)
@@ -1720,7 +1739,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
init_completion(&i2c_dev->msg_complete);
init_completion(&i2c_dev->dma_complete);
- i2c_dev->hw = of_device_get_match_data(&pdev->dev);
+ i2c_dev->hw = device_get_match_data(&pdev->dev);
i2c_dev->cont_id = pdev->id;
i2c_dev->dev = &pdev->dev;
@@ -1746,15 +1765,12 @@ static int tegra_i2c_probe(struct platform_device *pdev)
if (err)
return err;
- i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c");
- if (IS_ERR(i2c_dev->rst)) {
- dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst),
- "failed to get reset control\n");
- return PTR_ERR(i2c_dev->rst);
- }
-
tegra_i2c_parse_dt(i2c_dev);
+ err = tegra_i2c_init_reset(i2c_dev);
+ if (err)
+ return err;
+
err = tegra_i2c_init_clocks(i2c_dev);
if (err)
return err;
@@ -1923,12 +1939,21 @@ static const struct dev_pm_ops tegra_i2c_pm = {
NULL)
};
+static const struct acpi_device_id tegra_i2c_acpi_match[] = {
+ {.id = "NVDA0101", .driver_data = (kernel_ulong_t)&tegra210_i2c_hw},
+ {.id = "NVDA0201", .driver_data = (kernel_ulong_t)&tegra186_i2c_hw},
+ {.id = "NVDA0301", .driver_data = (kernel_ulong_t)&tegra194_i2c_hw},
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, tegra_i2c_acpi_match);
+
static struct platform_driver tegra_i2c_driver = {
.probe = tegra_i2c_probe,
.remove = tegra_i2c_remove,
.driver = {
.name = "tegra-i2c",
.of_match_table = tegra_i2c_of_match,
+ .acpi_match_table = tegra_i2c_acpi_match,
.pm = &tegra_i2c_pm,
},
};
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
index 41eb0dcc3204..4b9536f50800 100644
--- a/drivers/i2c/busses/i2c-virtio.c
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -165,7 +165,7 @@ err_free:
static void virtio_i2c_del_vqs(struct virtio_device *vdev)
{
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
}
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index 6d24dc385522..4e3b11c0f732 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -572,12 +572,6 @@ static int xlp9xx_i2c_remove(struct platform_device *pdev)
return 0;
}
-static const struct of_device_id xlp9xx_i2c_of_match[] = {
- { .compatible = "netlogic,xlp980-i2c", },
- { /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, xlp9xx_i2c_of_match);
-
#ifdef CONFIG_ACPI
static const struct acpi_device_id xlp9xx_i2c_acpi_ids[] = {
{"BRCM9007", 0},
@@ -592,7 +586,6 @@ static struct platform_driver xlp9xx_i2c_driver = {
.remove = xlp9xx_i2c_remove,
.driver = {
.name = "xlp9xx-i2c",
- .of_match_table = xlp9xx_i2c_of_match,
.acpi_match_table = ACPI_PTR(xlp9xx_i2c_acpi_ids),
},
};
diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c
deleted file mode 100644
index 9ce20652d494..000000000000
--- a/drivers/i2c/busses/i2c-xlr.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright 2011, Netlogic Microsystems Inc.
- * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/i2c.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/clk.h>
-#include <linux/interrupt.h>
-#include <linux/wait.h>
-
-/* XLR I2C REGISTERS */
-#define XLR_I2C_CFG 0x00
-#define XLR_I2C_CLKDIV 0x01
-#define XLR_I2C_DEVADDR 0x02
-#define XLR_I2C_ADDR 0x03
-#define XLR_I2C_DATAOUT 0x04
-#define XLR_I2C_DATAIN 0x05
-#define XLR_I2C_STATUS 0x06
-#define XLR_I2C_STARTXFR 0x07
-#define XLR_I2C_BYTECNT 0x08
-#define XLR_I2C_HDSTATIM 0x09
-
-/* Sigma Designs additional registers */
-#define XLR_I2C_INT_EN 0x09
-#define XLR_I2C_INT_STAT 0x0a
-
-/* XLR I2C REGISTERS FLAGS */
-#define XLR_I2C_BUS_BUSY 0x01
-#define XLR_I2C_SDOEMPTY 0x02
-#define XLR_I2C_RXRDY 0x04
-#define XLR_I2C_ACK_ERR 0x08
-#define XLR_I2C_ARB_STARTERR 0x30
-
-/* Register Values */
-#define XLR_I2C_CFG_ADDR 0xF8
-#define XLR_I2C_CFG_NOADDR 0xFA
-#define XLR_I2C_STARTXFR_ND 0x02 /* No Data */
-#define XLR_I2C_STARTXFR_RD 0x01 /* Read */
-#define XLR_I2C_STARTXFR_WR 0x00 /* Write */
-
-#define XLR_I2C_TIMEOUT 10 /* timeout per byte in msec */
-
-/*
- * On XLR/XLS, we need to use __raw_ IO to read the I2C registers
- * because they are in the big-endian MMIO area on the SoC.
- *
- * The readl/writel implementation on XLR/XLS byteswaps, because
- * those are for its little-endian PCI space (see arch/mips/Kconfig).
- */
-static inline void xlr_i2c_wreg(u32 __iomem *base, unsigned int reg, u32 val)
-{
- __raw_writel(val, base + reg);
-}
-
-static inline u32 xlr_i2c_rdreg(u32 __iomem *base, unsigned int reg)
-{
- return __raw_readl(base + reg);
-}
-
-#define XLR_I2C_FLAG_IRQ 1
-
-struct xlr_i2c_config {
- u32 flags; /* optional feature support */
- u32 status_busy; /* value of STATUS[0] when busy */
- u32 cfg_extra; /* extra CFG bits to set */
-};
-
-struct xlr_i2c_private {
- struct i2c_adapter adap;
- u32 __iomem *iobase;
- int irq;
- int pos;
- struct i2c_msg *msg;
- const struct xlr_i2c_config *cfg;
- wait_queue_head_t wait;
- struct clk *clk;
-};
-
-static int xlr_i2c_busy(struct xlr_i2c_private *priv, u32 status)
-{
- return (status & XLR_I2C_BUS_BUSY) == priv->cfg->status_busy;
-}
-
-static int xlr_i2c_idle(struct xlr_i2c_private *priv)
-{
- return !xlr_i2c_busy(priv, xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS));
-}
-
-static int xlr_i2c_wait(struct xlr_i2c_private *priv, unsigned long timeout)
-{
- int status;
- int t;
-
- t = wait_event_timeout(priv->wait, xlr_i2c_idle(priv),
- msecs_to_jiffies(timeout));
- if (!t)
- return -ETIMEDOUT;
-
- status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
-
- return status & XLR_I2C_ACK_ERR ? -EIO : 0;
-}
-
-static void xlr_i2c_tx_irq(struct xlr_i2c_private *priv, u32 status)
-{
- struct i2c_msg *msg = priv->msg;
-
- if (status & XLR_I2C_SDOEMPTY)
- xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT,
- msg->buf[priv->pos++]);
-}
-
-static void xlr_i2c_rx_irq(struct xlr_i2c_private *priv, u32 status)
-{
- struct i2c_msg *msg = priv->msg;
-
- if (status & XLR_I2C_RXRDY)
- msg->buf[priv->pos++] =
- xlr_i2c_rdreg(priv->iobase, XLR_I2C_DATAIN);
-}
-
-static irqreturn_t xlr_i2c_irq(int irq, void *dev_id)
-{
- struct xlr_i2c_private *priv = dev_id;
- struct i2c_msg *msg = priv->msg;
- u32 int_stat, status;
-
- int_stat = xlr_i2c_rdreg(priv->iobase, XLR_I2C_INT_STAT);
- if (!int_stat)
- return IRQ_NONE;
-
- xlr_i2c_wreg(priv->iobase, XLR_I2C_INT_STAT, int_stat);
-
- if (!msg)
- return IRQ_HANDLED;
-
- status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
-
- if (priv->pos < msg->len) {
- if (msg->flags & I2C_M_RD)
- xlr_i2c_rx_irq(priv, status);
- else
- xlr_i2c_tx_irq(priv, status);
- }
-
- if (!xlr_i2c_busy(priv, status))
- wake_up(&priv->wait);
-
- return IRQ_HANDLED;
-}
-
-static int xlr_i2c_tx(struct xlr_i2c_private *priv, u16 len,
- u8 *buf, u16 addr)
-{
- struct i2c_adapter *adap = &priv->adap;
- unsigned long timeout, stoptime, checktime;
- u32 i2c_status;
- int pos, timedout;
- u8 offset;
- u32 xfer;
-
- offset = buf[0];
- xlr_i2c_wreg(priv->iobase, XLR_I2C_ADDR, offset);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG,
- XLR_I2C_CFG_ADDR | priv->cfg->cfg_extra);
-
- timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT);
- stoptime = jiffies + timeout;
- timedout = 0;
-
- if (len == 1) {
- xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len - 1);
- xfer = XLR_I2C_STARTXFR_ND;
- pos = 1;
- } else {
- xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len - 2);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, buf[1]);
- xfer = XLR_I2C_STARTXFR_WR;
- pos = 2;
- }
-
- priv->pos = pos;
-
-retry:
- /* retry can only happen on the first byte */
- xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, xfer);
-
- if (priv->irq > 0)
- return xlr_i2c_wait(priv, XLR_I2C_TIMEOUT * len);
-
- while (!timedout) {
- checktime = jiffies;
- i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
-
- if ((i2c_status & XLR_I2C_SDOEMPTY) && pos < len) {
- xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, buf[pos++]);
-
- /* reset timeout on successful xmit */
- stoptime = jiffies + timeout;
- }
- timedout = time_after(checktime, stoptime);
-
- if (i2c_status & XLR_I2C_ARB_STARTERR) {
- if (timedout)
- break;
- goto retry;
- }
-
- if (i2c_status & XLR_I2C_ACK_ERR)
- return -EIO;
-
- if (!xlr_i2c_busy(priv, i2c_status) && pos >= len)
- return 0;
- }
- dev_err(&adap->dev, "I2C transmit timeout\n");
- return -ETIMEDOUT;
-}
-
-static int xlr_i2c_rx(struct xlr_i2c_private *priv, u16 len, u8 *buf, u16 addr)
-{
- struct i2c_adapter *adap = &priv->adap;
- u32 i2c_status;
- unsigned long timeout, stoptime, checktime;
- int nbytes, timedout;
-
- xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG,
- XLR_I2C_CFG_NOADDR | priv->cfg->cfg_extra);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len - 1);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr);
-
- priv->pos = 0;
-
- timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT);
- stoptime = jiffies + timeout;
- timedout = 0;
- nbytes = 0;
-retry:
- xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, XLR_I2C_STARTXFR_RD);
-
- if (priv->irq > 0)
- return xlr_i2c_wait(priv, XLR_I2C_TIMEOUT * len);
-
- while (!timedout) {
- checktime = jiffies;
- i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
- if (i2c_status & XLR_I2C_RXRDY) {
- if (nbytes >= len)
- return -EIO; /* should not happen */
-
- buf[nbytes++] =
- xlr_i2c_rdreg(priv->iobase, XLR_I2C_DATAIN);
-
- /* reset timeout on successful read */
- stoptime = jiffies + timeout;
- }
-
- timedout = time_after(checktime, stoptime);
- if (i2c_status & XLR_I2C_ARB_STARTERR) {
- if (timedout)
- break;
- goto retry;
- }
-
- if (i2c_status & XLR_I2C_ACK_ERR)
- return -EIO;
-
- if (!xlr_i2c_busy(priv, i2c_status))
- return 0;
- }
-
- dev_err(&adap->dev, "I2C receive timeout\n");
- return -ETIMEDOUT;
-}
-
-static int xlr_i2c_xfer(struct i2c_adapter *adap,
- struct i2c_msg *msgs, int num)
-{
- struct i2c_msg *msg;
- int i;
- int ret = 0;
- struct xlr_i2c_private *priv = i2c_get_adapdata(adap);
-
- ret = clk_enable(priv->clk);
- if (ret)
- return ret;
-
- if (priv->irq)
- xlr_i2c_wreg(priv->iobase, XLR_I2C_INT_EN, 0xf);
-
-
- for (i = 0; ret == 0 && i < num; i++) {
- msg = &msgs[i];
- priv->msg = msg;
- if (msg->flags & I2C_M_RD)
- ret = xlr_i2c_rx(priv, msg->len, &msg->buf[0],
- msg->addr);
- else
- ret = xlr_i2c_tx(priv, msg->len, &msg->buf[0],
- msg->addr);
- }
-
- if (priv->irq)
- xlr_i2c_wreg(priv->iobase, XLR_I2C_INT_EN, 0);
-
- clk_disable(priv->clk);
- priv->msg = NULL;
-
- return (ret != 0) ? ret : num;
-}
-
-static u32 xlr_func(struct i2c_adapter *adap)
-{
- /* Emulate SMBUS over I2C */
- return (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK) | I2C_FUNC_I2C;
-}
-
-static const struct i2c_algorithm xlr_i2c_algo = {
- .master_xfer = xlr_i2c_xfer,
- .functionality = xlr_func,
-};
-
-static const struct i2c_adapter_quirks xlr_i2c_quirks = {
- .flags = I2C_AQ_NO_ZERO_LEN,
-};
-
-static const struct xlr_i2c_config xlr_i2c_config_default = {
- .status_busy = XLR_I2C_BUS_BUSY,
- .cfg_extra = 0,
-};
-
-static const struct xlr_i2c_config xlr_i2c_config_tangox = {
- .flags = XLR_I2C_FLAG_IRQ,
- .status_busy = 0,
- .cfg_extra = 1 << 8,
-};
-
-static const struct of_device_id xlr_i2c_dt_ids[] = {
- {
- .compatible = "sigma,smp8642-i2c",
- .data = &xlr_i2c_config_tangox,
- },
- { }
-};
-MODULE_DEVICE_TABLE(of, xlr_i2c_dt_ids);
-
-static int xlr_i2c_probe(struct platform_device *pdev)
-{
- const struct of_device_id *match;
- struct xlr_i2c_private *priv;
- struct clk *clk;
- unsigned long clk_rate;
- unsigned long clk_div;
- u32 busfreq;
- int irq;
- int ret;
-
- priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- match = of_match_device(xlr_i2c_dt_ids, &pdev->dev);
- if (match)
- priv->cfg = match->data;
- else
- priv->cfg = &xlr_i2c_config_default;
-
- priv->iobase = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(priv->iobase))
- return PTR_ERR(priv->iobase);
-
- irq = platform_get_irq(pdev, 0);
-
- if (irq > 0 && (priv->cfg->flags & XLR_I2C_FLAG_IRQ)) {
- priv->irq = irq;
-
- xlr_i2c_wreg(priv->iobase, XLR_I2C_INT_EN, 0);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_INT_STAT, 0xf);
-
- ret = devm_request_irq(&pdev->dev, priv->irq, xlr_i2c_irq,
- IRQF_SHARED, dev_name(&pdev->dev),
- priv);
- if (ret)
- return ret;
-
- init_waitqueue_head(&priv->wait);
- }
-
- if (of_property_read_u32(pdev->dev.of_node, "clock-frequency",
- &busfreq))
- busfreq = I2C_MAX_STANDARD_MODE_FREQ;
-
- clk = devm_clk_get(&pdev->dev, NULL);
- if (!IS_ERR(clk)) {
- ret = clk_prepare_enable(clk);
- if (ret)
- return ret;
-
- clk_rate = clk_get_rate(clk);
- clk_div = DIV_ROUND_UP(clk_rate, 2 * busfreq);
- xlr_i2c_wreg(priv->iobase, XLR_I2C_CLKDIV, clk_div);
-
- clk_disable(clk);
- priv->clk = clk;
- }
-
- priv->adap.dev.parent = &pdev->dev;
- priv->adap.dev.of_node = pdev->dev.of_node;
- priv->adap.owner = THIS_MODULE;
- priv->adap.algo_data = priv;
- priv->adap.algo = &xlr_i2c_algo;
- priv->adap.quirks = &xlr_i2c_quirks;
- priv->adap.nr = pdev->id;
- priv->adap.class = I2C_CLASS_HWMON;
- snprintf(priv->adap.name, sizeof(priv->adap.name), "xlr-i2c");
-
- i2c_set_adapdata(&priv->adap, priv);
- ret = i2c_add_numbered_adapter(&priv->adap);
- if (ret < 0)
- goto err_unprepare_clk;
-
- platform_set_drvdata(pdev, priv);
- dev_info(&priv->adap.dev, "Added I2C Bus.\n");
- return 0;
-
-err_unprepare_clk:
- clk_unprepare(clk);
- return ret;
-}
-
-static int xlr_i2c_remove(struct platform_device *pdev)
-{
- struct xlr_i2c_private *priv;
-
- priv = platform_get_drvdata(pdev);
- i2c_del_adapter(&priv->adap);
- clk_unprepare(priv->clk);
-
- return 0;
-}
-
-static struct platform_driver xlr_i2c_driver = {
- .probe = xlr_i2c_probe,
- .remove = xlr_i2c_remove,
- .driver = {
- .name = "xlr-i2cbus",
- .of_match_table = xlr_i2c_dt_ids,
- },
-};
-
-module_platform_driver(xlr_i2c_driver);
-
-MODULE_AUTHOR("Ganesan Ramalingam <ganesanr@netlogicmicro.com>");
-MODULE_DESCRIPTION("XLR/XLS SoC I2C Controller driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:xlr-i2cbus");
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 73253e667de1..2c59dd748a49 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -953,6 +953,7 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf
client->dev.of_node = of_node_get(info->of_node);
client->dev.fwnode = info->fwnode;
+ device_enable_async_suspend(&client->dev);
i2c_dev_set_name(adap, client, info);
if (info->swnode) {
@@ -1482,6 +1483,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
if (res)
goto out_reg;
+ device_enable_async_suspend(&adap->dev);
pm_runtime_no_callbacks(&adap->dev);
pm_suspend_ignore_children(&adap->dev, true);
pm_runtime_enable(&adap->dev);
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index bac415a52b78..73a23e117ebe 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -7,6 +7,7 @@
#include <linux/i2c.h>
#include <linux/i2c-mux.h>
+#include <linux/overflow.h>
#include <linux/platform_data/i2c-mux-gpio.h>
#include <linux/platform_device.h>
#include <linux/module.h>
@@ -49,49 +50,11 @@ static int i2c_mux_gpio_deselect(struct i2c_mux_core *muxc, u32 chan)
return 0;
}
-#ifdef CONFIG_ACPI
-
-static int i2c_mux_gpio_get_acpi_adr(struct device *dev,
- struct fwnode_handle *fwdev,
- unsigned int *adr)
-
-{
- unsigned long long adr64;
- acpi_status status;
-
- status = acpi_evaluate_integer(ACPI_HANDLE_FWNODE(fwdev),
- METHOD_NAME__ADR,
- NULL, &adr64);
-
- if (!ACPI_SUCCESS(status)) {
- dev_err(dev, "Cannot get address\n");
- return -EINVAL;
- }
-
- *adr = adr64;
- if (*adr != adr64) {
- dev_err(dev, "Address out of range\n");
- return -ERANGE;
- }
-
- return 0;
-}
-
-#else
-
-static int i2c_mux_gpio_get_acpi_adr(struct device *dev,
- struct fwnode_handle *fwdev,
- unsigned int *adr)
-{
- return -EINVAL;
-}
-
-#endif
-
static int i2c_mux_gpio_probe_fw(struct gpiomux *mux,
struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct fwnode_handle *fwnode = dev_fwnode(dev);
struct device_node *np = dev->of_node;
struct device_node *adapter_np;
struct i2c_adapter *adapter = NULL;
@@ -99,7 +62,7 @@ static int i2c_mux_gpio_probe_fw(struct gpiomux *mux,
unsigned *values;
int rc, i = 0;
- if (is_of_node(dev->fwnode)) {
+ if (is_of_node(fwnode)) {
if (!np)
return -ENODEV;
@@ -111,7 +74,7 @@ static int i2c_mux_gpio_probe_fw(struct gpiomux *mux,
adapter = of_find_i2c_adapter_by_node(adapter_np);
of_node_put(adapter_np);
- } else if (is_acpi_node(dev->fwnode)) {
+ } else if (is_acpi_node(fwnode)) {
/*
* In ACPI land the mux should be a direct child of the i2c
* bus it muxes.
@@ -141,16 +104,16 @@ static int i2c_mux_gpio_probe_fw(struct gpiomux *mux,
fwnode_property_read_u32(child, "reg", values + i);
} else if (is_acpi_node(child)) {
- rc = i2c_mux_gpio_get_acpi_adr(dev, child, values + i);
+ rc = acpi_get_local_address(ACPI_HANDLE_FWNODE(child), values + i);
if (rc)
- return rc;
+ return dev_err_probe(dev, rc, "Cannot get address\n");
}
i++;
}
mux->data.values = values;
- if (fwnode_property_read_u32(dev->fwnode, "idle-state", &mux->data.idle))
+ if (device_property_read_u32(dev, "idle-state", &mux->data.idle))
mux->data.idle = I2C_MUX_GPIO_NO_IDLE;
return 0;
@@ -190,7 +153,7 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev)
return -EPROBE_DEFER;
muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values,
- ngpios * sizeof(*mux->gpios), 0,
+ array_size(ngpios, sizeof(*mux->gpios)), 0,
i2c_mux_gpio_select, NULL);
if (!muxc) {
ret = -ENOMEM;
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index c3b4c677b442..dfe18dcd008d 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -343,7 +343,8 @@ struct bus_type i3c_bus_type = {
static enum i3c_addr_slot_status
i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr)
{
- int status, bitpos = addr * 2;
+ unsigned long status;
+ int bitpos = addr * 2;
if (addr > I2C_MAX_ADDR)
return I3C_ADDR_SLOT_RSVD;
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 03a368da51b9..51a8608203de 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -793,6 +793,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m)
return -ENOMEM;
pos = dw_i3c_master_get_free_pos(master);
+ if (pos < 0) {
+ dw_i3c_master_free_xfer(xfer);
+ return pos;
+ }
cmd = &xfer->cmds[0];
cmd->cmd_hi = 0x1;
cmd->cmd_lo = COMMAND_PORT_DEV_COUNT(master->maxdevs - pos) |
diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
index 1b73647cc3b1..8c01123dc4ed 100644
--- a/drivers/i3c/master/mipi-i3c-hci/core.c
+++ b/drivers/i3c/master/mipi-i3c-hci/core.c
@@ -662,7 +662,7 @@ static int i3c_hci_init(struct i3c_hci *hci)
/* Make sure our data ordering fits the host's */
regval = reg_read(HC_CONTROL);
- if (IS_ENABLED(CONFIG_BIG_ENDIAN)) {
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
if (!(regval & HC_CONTROL_DATA_BIG_ENDIAN)) {
regval |= HC_CONTROL_DATA_BIG_ENDIAN;
reg_write(HC_CONTROL, regval);
diff --git a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
index 783e551a2c85..97bb49ff5b53 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
@@ -160,9 +160,7 @@ static int hci_dat_v1_get_index(struct i3c_hci *hci, u8 dev_addr)
unsigned int dat_idx;
u32 dat_w0;
- for (dat_idx = find_first_bit(hci->DAT_data, hci->DAT_entries);
- dat_idx < hci->DAT_entries;
- dat_idx = find_next_bit(hci->DAT_data, hci->DAT_entries, dat_idx)) {
+ for_each_set_bit(dat_idx, hci->DAT_data, hci->DAT_entries) {
dat_w0 = dat_w0_read(dat_idx);
if (FIELD_GET(DAT_0_DYNAMIC_ADDRESS, dat_w0) == dev_addr)
return dat_idx;
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
index af873a9be050..2990ac9eaade 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dma.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -223,7 +223,7 @@ static int hci_dma_init(struct i3c_hci *hci)
}
if (nr_rings > XFER_RINGS)
nr_rings = XFER_RINGS;
- rings = kzalloc(sizeof(*rings) + nr_rings * sizeof(*rh), GFP_KERNEL);
+ rings = kzalloc(struct_size(rings, headers, nr_rings), GFP_KERNEL);
if (!rings)
return -ENOMEM;
hci->io_data = rings;
diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h
index 80beb1d5be8f..f109923f6c3f 100644
--- a/drivers/i3c/master/mipi-i3c-hci/hci.h
+++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
@@ -98,7 +98,7 @@ struct hci_xfer {
static inline struct hci_xfer *hci_alloc_xfer(unsigned int n)
{
- return kzalloc(sizeof(struct hci_xfer) * n, GFP_KERNEL);
+ return kcalloc(n, sizeof(struct hci_xfer), GFP_KERNEL);
}
static inline void hci_free_xfer(struct hci_xfer *xfer, unsigned int n)
diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c
index 879e5a64acaf..7550dad64ecf 100644
--- a/drivers/i3c/master/svc-i3c-master.c
+++ b/drivers/i3c/master/svc-i3c-master.c
@@ -17,7 +17,9 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
/* Master Mode Registers */
#define SVC_I3C_MCONFIG 0x000
@@ -119,6 +121,7 @@
#define SVC_MDYNADDR_ADDR(x) FIELD_PREP(GENMASK(7, 1), (x))
#define SVC_I3C_MAX_DEVS 32
+#define SVC_I3C_PM_TIMEOUT_MS 1000
/* This parameter depends on the implementation and may be tuned */
#define SVC_I3C_FIFO_SIZE 16
@@ -236,6 +239,40 @@ static void svc_i3c_master_disable_interrupts(struct svc_i3c_master *master)
writel(mask, master->regs + SVC_I3C_MINTCLR);
}
+static void svc_i3c_master_clear_merrwarn(struct svc_i3c_master *master)
+{
+ /* Clear pending warnings */
+ writel(readl(master->regs + SVC_I3C_MERRWARN),
+ master->regs + SVC_I3C_MERRWARN);
+}
+
+static void svc_i3c_master_flush_fifo(struct svc_i3c_master *master)
+{
+ /* Flush FIFOs */
+ writel(SVC_I3C_MDATACTRL_FLUSHTB | SVC_I3C_MDATACTRL_FLUSHRB,
+ master->regs + SVC_I3C_MDATACTRL);
+}
+
+static void svc_i3c_master_reset_fifo_trigger(struct svc_i3c_master *master)
+{
+ u32 reg;
+
+ /* Set RX and TX tigger levels, flush FIFOs */
+ reg = SVC_I3C_MDATACTRL_FLUSHTB |
+ SVC_I3C_MDATACTRL_FLUSHRB |
+ SVC_I3C_MDATACTRL_UNLOCK_TRIG |
+ SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL |
+ SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY;
+ writel(reg, master->regs + SVC_I3C_MDATACTRL);
+}
+
+static void svc_i3c_master_reset(struct svc_i3c_master *master)
+{
+ svc_i3c_master_clear_merrwarn(master);
+ svc_i3c_master_reset_fifo_trigger(master);
+ svc_i3c_master_disable_interrupts(master);
+}
+
static inline struct svc_i3c_master *
to_svc_i3c_master(struct i3c_master_controller *master)
{
@@ -279,12 +316,6 @@ static void svc_i3c_master_emit_stop(struct svc_i3c_master *master)
udelay(1);
}
-static void svc_i3c_master_clear_merrwarn(struct svc_i3c_master *master)
-{
- writel(readl(master->regs + SVC_I3C_MERRWARN),
- master->regs + SVC_I3C_MERRWARN);
-}
-
static int svc_i3c_master_handle_ibi(struct svc_i3c_master *master,
struct i3c_dev_desc *dev)
{
@@ -449,13 +480,23 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
struct i3c_device_info info = {};
unsigned long fclk_rate, fclk_period_ns;
unsigned int high_period_ns, od_low_period_ns;
- u32 ppbaud, pplow, odhpp, odbaud, i2cbaud, reg;
+ u32 ppbaud, pplow, odhpp, odbaud, odstop, i2cbaud, reg;
int ret;
+ ret = pm_runtime_resume_and_get(master->dev);
+ if (ret < 0) {
+ dev_err(master->dev,
+ "<%s> cannot resume i3c bus master, err: %d\n",
+ __func__, ret);
+ return ret;
+ }
+
/* Timings derivation */
fclk_rate = clk_get_rate(master->fclk);
- if (!fclk_rate)
- return -EINVAL;
+ if (!fclk_rate) {
+ ret = -EINVAL;
+ goto rpm_out;
+ }
fclk_period_ns = DIV_ROUND_UP(1000000000, fclk_rate);
@@ -479,6 +520,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
switch (bus->mode) {
case I3C_BUS_MODE_PURE:
i2cbaud = 0;
+ odstop = 0;
break;
case I3C_BUS_MODE_MIXED_FAST:
case I3C_BUS_MODE_MIXED_LIMITED:
@@ -487,6 +529,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
* between the high and low period does not really matter.
*/
i2cbaud = DIV_ROUND_UP(1000, od_low_period_ns) - 2;
+ odstop = 1;
break;
case I3C_BUS_MODE_MIXED_SLOW:
/*
@@ -494,15 +537,16 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
* constraints as the FM+ mode.
*/
i2cbaud = DIV_ROUND_UP(2500, od_low_period_ns) - 2;
+ odstop = 1;
break;
default:
- return -EINVAL;
+ goto rpm_out;
}
reg = SVC_I3C_MCONFIG_MASTER_EN |
SVC_I3C_MCONFIG_DISTO(0) |
SVC_I3C_MCONFIG_HKEEP(0) |
- SVC_I3C_MCONFIG_ODSTOP(0) |
+ SVC_I3C_MCONFIG_ODSTOP(odstop) |
SVC_I3C_MCONFIG_PPBAUD(ppbaud) |
SVC_I3C_MCONFIG_PPLOW(pplow) |
SVC_I3C_MCONFIG_ODBAUD(odbaud) |
@@ -514,7 +558,7 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
/* Master core's registration */
ret = i3c_master_get_free_addr(m, 0);
if (ret < 0)
- return ret;
+ goto rpm_out;
info.dyn_addr = ret;
@@ -523,21 +567,33 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m)
ret = i3c_master_set_info(&master->base, &info);
if (ret)
- return ret;
+ goto rpm_out;
- svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART);
+rpm_out:
+ pm_runtime_mark_last_busy(master->dev);
+ pm_runtime_put_autosuspend(master->dev);
- return 0;
+ return ret;
}
static void svc_i3c_master_bus_cleanup(struct i3c_master_controller *m)
{
struct svc_i3c_master *master = to_svc_i3c_master(m);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(master->dev);
+ if (ret < 0) {
+ dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__);
+ return;
+ }
svc_i3c_master_disable_interrupts(master);
/* Disable master */
writel(0, master->regs + SVC_I3C_MCONFIG);
+
+ pm_runtime_mark_last_busy(master->dev);
+ pm_runtime_put_autosuspend(master->dev);
}
static int svc_i3c_master_reserve_slot(struct svc_i3c_master *master)
@@ -656,8 +712,10 @@ static int svc_i3c_master_readb(struct svc_i3c_master *master, u8 *dst,
u32 reg;
for (i = 0; i < len; i++) {
- ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
- SVC_I3C_MSTATUS_RXPEND(reg), 0, 1000);
+ ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS,
+ reg,
+ SVC_I3C_MSTATUS_RXPEND(reg),
+ 0, 1000);
if (ret)
return ret;
@@ -687,10 +745,11 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master,
* Either one slave will send its ID, or the assignment process
* is done.
*/
- ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
- SVC_I3C_MSTATUS_RXPEND(reg) |
- SVC_I3C_MSTATUS_MCTRLDONE(reg),
- 1, 1000);
+ ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS,
+ reg,
+ SVC_I3C_MSTATUS_RXPEND(reg) |
+ SVC_I3C_MSTATUS_MCTRLDONE(reg),
+ 1, 1000);
if (ret)
return ret;
@@ -744,11 +803,12 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master,
}
/* Wait for the slave to be ready to receive its address */
- ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
- SVC_I3C_MSTATUS_MCTRLDONE(reg) &&
- SVC_I3C_MSTATUS_STATE_DAA(reg) &&
- SVC_I3C_MSTATUS_BETWEEN(reg),
- 0, 1000);
+ ret = readl_poll_timeout_atomic(master->regs + SVC_I3C_MSTATUS,
+ reg,
+ SVC_I3C_MSTATUS_MCTRLDONE(reg) &&
+ SVC_I3C_MSTATUS_STATE_DAA(reg) &&
+ SVC_I3C_MSTATUS_BETWEEN(reg),
+ 0, 1000);
if (ret)
return ret;
@@ -832,31 +892,36 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m)
unsigned int dev_nb;
int ret, i;
+ ret = pm_runtime_resume_and_get(master->dev);
+ if (ret < 0) {
+ dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__);
+ return ret;
+ }
+
spin_lock_irqsave(&master->xferqueue.lock, flags);
ret = svc_i3c_master_do_daa_locked(master, addrs, &dev_nb);
spin_unlock_irqrestore(&master->xferqueue.lock, flags);
- if (ret)
- goto emit_stop;
+ if (ret) {
+ svc_i3c_master_emit_stop(master);
+ svc_i3c_master_clear_merrwarn(master);
+ goto rpm_out;
+ }
/* Register all devices who participated to the core */
for (i = 0; i < dev_nb; i++) {
ret = i3c_master_add_i3c_dev_locked(m, addrs[i]);
if (ret)
- return ret;
+ goto rpm_out;
}
/* Configure IBI auto-rules */
ret = svc_i3c_update_ibirules(master);
- if (ret) {
+ if (ret)
dev_err(master->dev, "Cannot handle such a list of devices");
- return ret;
- }
- return 0;
-
-emit_stop:
- svc_i3c_master_emit_stop(master);
- svc_i3c_master_clear_merrwarn(master);
+rpm_out:
+ pm_runtime_mark_last_busy(master->dev);
+ pm_runtime_put_autosuspend(master->dev);
return ret;
}
@@ -864,27 +929,35 @@ emit_stop:
static int svc_i3c_master_read(struct svc_i3c_master *master,
u8 *in, unsigned int len)
{
- int offset = 0, i, ret;
- u32 mdctrl;
+ int offset = 0, i;
+ u32 mdctrl, mstatus;
+ bool completed = false;
+ unsigned int count;
+ unsigned long start = jiffies;
- while (offset < len) {
- unsigned int count;
+ while (!completed) {
+ mstatus = readl(master->regs + SVC_I3C_MSTATUS);
+ if (SVC_I3C_MSTATUS_COMPLETE(mstatus) != 0)
+ completed = true;
- ret = readl_poll_timeout(master->regs + SVC_I3C_MDATACTRL,
- mdctrl,
- !(mdctrl & SVC_I3C_MDATACTRL_RXEMPTY),
- 0, 1000);
- if (ret)
- return ret;
+ if (time_after(jiffies, start + msecs_to_jiffies(1000))) {
+ dev_dbg(master->dev, "I3C read timeout\n");
+ return -ETIMEDOUT;
+ }
+ mdctrl = readl(master->regs + SVC_I3C_MDATACTRL);
count = SVC_I3C_MDATACTRL_RXCOUNT(mdctrl);
+ if (offset + count > len) {
+ dev_err(master->dev, "I3C receive length too long!\n");
+ return -EINVAL;
+ }
for (i = 0; i < count; i++)
in[offset + i] = readl(master->regs + SVC_I3C_MRDATAB);
offset += count;
}
- return 0;
+ return offset;
}
static int svc_i3c_master_write(struct svc_i3c_master *master,
@@ -917,7 +990,7 @@ static int svc_i3c_master_write(struct svc_i3c_master *master,
static int svc_i3c_master_xfer(struct svc_i3c_master *master,
bool rnw, unsigned int xfer_type, u8 addr,
u8 *in, const u8 *out, unsigned int xfer_len,
- unsigned int read_len, bool continued)
+ unsigned int *read_len, bool continued)
{
u32 reg;
int ret;
@@ -927,7 +1000,7 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master,
SVC_I3C_MCTRL_IBIRESP_NACK |
SVC_I3C_MCTRL_DIR(rnw) |
SVC_I3C_MCTRL_ADDR(addr) |
- SVC_I3C_MCTRL_RDTERM(read_len),
+ SVC_I3C_MCTRL_RDTERM(*read_len),
master->regs + SVC_I3C_MCTRL);
ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
@@ -939,17 +1012,27 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master,
ret = svc_i3c_master_read(master, in, xfer_len);
else
ret = svc_i3c_master_write(master, out, xfer_len);
- if (ret)
+ if (ret < 0)
goto emit_stop;
+ if (rnw)
+ *read_len = ret;
+
ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
SVC_I3C_MSTATUS_COMPLETE(reg), 0, 1000);
if (ret)
goto emit_stop;
- if (!continued)
+ writel(SVC_I3C_MINT_COMPLETE, master->regs + SVC_I3C_MSTATUS);
+
+ if (!continued) {
svc_i3c_master_emit_stop(master);
+ /* Wait idle if stop is sent. */
+ readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg,
+ SVC_I3C_MSTATUS_STATE_IDLE(reg), 0, 1000);
+ }
+
return 0;
emit_stop:
@@ -1007,17 +1090,29 @@ static void svc_i3c_master_start_xfer_locked(struct svc_i3c_master *master)
if (!xfer)
return;
+ ret = pm_runtime_resume_and_get(master->dev);
+ if (ret < 0) {
+ dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__);
+ return;
+ }
+
+ svc_i3c_master_clear_merrwarn(master);
+ svc_i3c_master_flush_fifo(master);
+
for (i = 0; i < xfer->ncmds; i++) {
struct svc_i3c_cmd *cmd = &xfer->cmds[i];
ret = svc_i3c_master_xfer(master, cmd->rnw, xfer->type,
cmd->addr, cmd->in, cmd->out,
- cmd->len, cmd->read_len,
+ cmd->len, &cmd->read_len,
cmd->continued);
if (ret)
break;
}
+ pm_runtime_mark_last_busy(master->dev);
+ pm_runtime_put_autosuspend(master->dev);
+
xfer->ret = ret;
complete(&xfer->comp);
@@ -1141,6 +1236,9 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master,
if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000)))
svc_i3c_master_dequeue_xfer(master, xfer);
+ if (cmd->read_len != xfer_len)
+ ccc->dests[0].payload.len = cmd->read_len;
+
ret = xfer->ret;
svc_i3c_master_free_xfer(xfer);
@@ -1291,6 +1389,16 @@ static void svc_i3c_master_free_ibi(struct i3c_dev_desc *dev)
static int svc_i3c_master_enable_ibi(struct i3c_dev_desc *dev)
{
struct i3c_master_controller *m = i3c_dev_get_master(dev);
+ struct svc_i3c_master *master = to_svc_i3c_master(m);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(master->dev);
+ if (ret < 0) {
+ dev_err(master->dev, "<%s> Cannot get runtime PM.\n", __func__);
+ return ret;
+ }
+
+ svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART);
return i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR);
}
@@ -1298,8 +1406,17 @@ static int svc_i3c_master_enable_ibi(struct i3c_dev_desc *dev)
static int svc_i3c_master_disable_ibi(struct i3c_dev_desc *dev)
{
struct i3c_master_controller *m = i3c_dev_get_master(dev);
+ struct svc_i3c_master *master = to_svc_i3c_master(m);
+ int ret;
+
+ svc_i3c_master_disable_interrupts(master);
- return i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR);
+ ret = i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR);
+
+ pm_runtime_mark_last_busy(master->dev);
+ pm_runtime_put_autosuspend(master->dev);
+
+ return ret;
}
static void svc_i3c_master_recycle_ibi_slot(struct i3c_dev_desc *dev,
@@ -1330,23 +1447,35 @@ static const struct i3c_master_controller_ops svc_i3c_master_ops = {
.disable_ibi = svc_i3c_master_disable_ibi,
};
-static void svc_i3c_master_reset(struct svc_i3c_master *master)
+static int svc_i3c_master_prepare_clks(struct svc_i3c_master *master)
{
- u32 reg;
+ int ret = 0;
- /* Clear pending warnings */
- writel(readl(master->regs + SVC_I3C_MERRWARN),
- master->regs + SVC_I3C_MERRWARN);
+ ret = clk_prepare_enable(master->pclk);
+ if (ret)
+ return ret;
- /* Set RX and TX tigger levels, flush FIFOs */
- reg = SVC_I3C_MDATACTRL_FLUSHTB |
- SVC_I3C_MDATACTRL_FLUSHRB |
- SVC_I3C_MDATACTRL_UNLOCK_TRIG |
- SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL |
- SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY;
- writel(reg, master->regs + SVC_I3C_MDATACTRL);
+ ret = clk_prepare_enable(master->fclk);
+ if (ret) {
+ clk_disable_unprepare(master->pclk);
+ return ret;
+ }
- svc_i3c_master_disable_interrupts(master);
+ ret = clk_prepare_enable(master->sclk);
+ if (ret) {
+ clk_disable_unprepare(master->pclk);
+ clk_disable_unprepare(master->fclk);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void svc_i3c_master_unprepare_clks(struct svc_i3c_master *master)
+{
+ clk_disable_unprepare(master->pclk);
+ clk_disable_unprepare(master->fclk);
+ clk_disable_unprepare(master->sclk);
}
static int svc_i3c_master_probe(struct platform_device *pdev)
@@ -1381,26 +1510,16 @@ static int svc_i3c_master_probe(struct platform_device *pdev)
master->dev = dev;
- svc_i3c_master_reset(master);
-
- ret = clk_prepare_enable(master->pclk);
+ ret = svc_i3c_master_prepare_clks(master);
if (ret)
return ret;
- ret = clk_prepare_enable(master->fclk);
- if (ret)
- goto err_disable_pclk;
-
- ret = clk_prepare_enable(master->sclk);
- if (ret)
- goto err_disable_fclk;
-
INIT_WORK(&master->hj_work, svc_i3c_master_hj_work);
INIT_WORK(&master->ibi_work, svc_i3c_master_ibi_work);
ret = devm_request_irq(dev, master->irq, svc_i3c_master_irq_handler,
IRQF_NO_SUSPEND, "svc-i3c-irq", master);
if (ret)
- goto err_disable_sclk;
+ goto err_disable_clks;
master->free_slots = GENMASK(SVC_I3C_MAX_DEVS - 1, 0);
@@ -1414,27 +1533,38 @@ static int svc_i3c_master_probe(struct platform_device *pdev)
GFP_KERNEL);
if (!master->ibi.slots) {
ret = -ENOMEM;
- goto err_disable_sclk;
+ goto err_disable_clks;
}
platform_set_drvdata(pdev, master);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, SVC_I3C_PM_TIMEOUT_MS);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ svc_i3c_master_reset(master);
+
/* Register the master */
ret = i3c_master_register(&master->base, &pdev->dev,
&svc_i3c_master_ops, false);
if (ret)
- goto err_disable_sclk;
+ goto rpm_disable;
- return 0;
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
-err_disable_sclk:
- clk_disable_unprepare(master->sclk);
+ return 0;
-err_disable_fclk:
- clk_disable_unprepare(master->fclk);
+rpm_disable:
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
-err_disable_pclk:
- clk_disable_unprepare(master->pclk);
+err_disable_clks:
+ svc_i3c_master_unprepare_clks(master);
return ret;
}
@@ -1448,17 +1578,45 @@ static int svc_i3c_master_remove(struct platform_device *pdev)
if (ret)
return ret;
- clk_disable_unprepare(master->pclk);
- clk_disable_unprepare(master->fclk);
- clk_disable_unprepare(master->sclk);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
return 0;
}
+static int __maybe_unused svc_i3c_runtime_suspend(struct device *dev)
+{
+ struct svc_i3c_master *master = dev_get_drvdata(dev);
+
+ svc_i3c_master_unprepare_clks(master);
+ pinctrl_pm_select_sleep_state(dev);
+
+ return 0;
+}
+
+static int __maybe_unused svc_i3c_runtime_resume(struct device *dev)
+{
+ struct svc_i3c_master *master = dev_get_drvdata(dev);
+ int ret = 0;
+
+ pinctrl_pm_select_default_state(dev);
+ svc_i3c_master_prepare_clks(master);
+
+ return ret;
+}
+
+static const struct dev_pm_ops svc_i3c_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(svc_i3c_runtime_suspend,
+ svc_i3c_runtime_resume, NULL)
+};
+
static const struct of_device_id svc_i3c_master_of_match_tbl[] = {
{ .compatible = "silvaco,i3c-master" },
{ /* sentinel */ },
};
+MODULE_DEVICE_TABLE(of, svc_i3c_master_of_match_tbl);
static struct platform_driver svc_i3c_master = {
.probe = svc_i3c_master_probe,
@@ -1466,6 +1624,7 @@ static struct platform_driver svc_i3c_master = {
.driver = {
.name = "silvaco-i3c-master",
.of_match_table = svc_i3c_master_of_match_tbl,
+ .pm = &svc_i3c_pm_ops,
},
};
module_platform_driver(svc_i3c_master);
diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 2334ad249b46..b190846c3dc2 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -70,6 +70,7 @@ config IIO_TRIGGERED_EVENT
source "drivers/iio/accel/Kconfig"
source "drivers/iio/adc/Kconfig"
+source "drivers/iio/addac/Kconfig"
source "drivers/iio/afe/Kconfig"
source "drivers/iio/amplifiers/Kconfig"
source "drivers/iio/cdc/Kconfig"
@@ -77,6 +78,7 @@ source "drivers/iio/chemical/Kconfig"
source "drivers/iio/common/Kconfig"
source "drivers/iio/dac/Kconfig"
source "drivers/iio/dummy/Kconfig"
+source "drivers/iio/filter/Kconfig"
source "drivers/iio/frequency/Kconfig"
source "drivers/iio/gyro/Kconfig"
source "drivers/iio/health/Kconfig"
diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
index 65e39bd4f934..3be08cdadd7e 100644
--- a/drivers/iio/Makefile
+++ b/drivers/iio/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_IIO_TRIGGERED_EVENT) += industrialio-triggered-event.o
obj-y += accel/
obj-y += adc/
+obj-y += addac/
obj-y += afe/
obj-y += amplifiers/
obj-y += buffer/
@@ -24,6 +25,7 @@ obj-y += common/
obj-y += dac/
obj-y += dummy/
obj-y += gyro/
+obj-y += filter/
obj-y += frequency/
obj-y += health/
obj-y += humidity/
diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c
index 2edfcb4819b7..d8a454c266d5 100644
--- a/drivers/iio/accel/bma180.c
+++ b/drivers/iio/accel/bma180.c
@@ -658,7 +658,7 @@ static const struct iio_chan_spec_ext_info bma023_ext_info[] = {
static const struct iio_chan_spec_ext_info bma180_ext_info[] = {
IIO_ENUM("power_mode", IIO_SHARED_BY_TYPE, &bma180_power_mode_enum),
- IIO_ENUM_AVAILABLE("power_mode", &bma180_power_mode_enum),
+ IIO_ENUM_AVAILABLE("power_mode", IIO_SHARED_BY_TYPE, &bma180_power_mode_enum),
IIO_MOUNT_MATRIX(IIO_SHARED_BY_DIR, bma180_accel_get_mount_matrix),
{ }
};
@@ -938,7 +938,7 @@ static int bma180_probe(struct i2c_client *client,
i2c_set_clientdata(client, indio_dev);
data->client = client;
if (client->dev.of_node)
- chip = (enum chip_ids)of_device_get_match_data(dev);
+ chip = (uintptr_t)of_device_get_match_data(dev);
else
chip = id->driver_data;
data->part_info = &bma180_part_info[chip];
diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c
index bc4c626e454d..74024d7ce5ac 100644
--- a/drivers/iio/accel/bma220_spi.c
+++ b/drivers/iio/accel/bma220_spi.c
@@ -27,7 +27,6 @@
#define BMA220_CHIP_ID 0xDD
#define BMA220_READ_MASK BIT(7)
#define BMA220_RANGE_MASK GENMASK(1, 0)
-#define BMA220_DATA_SHIFT 2
#define BMA220_SUSPEND_SLEEP 0xFF
#define BMA220_SUSPEND_WAKE 0x00
@@ -45,7 +44,7 @@
.sign = 's', \
.realbits = 6, \
.storagebits = 8, \
- .shift = BMA220_DATA_SHIFT, \
+ .shift = 2, \
.endianness = IIO_CPU, \
}, \
}
@@ -125,7 +124,8 @@ static int bma220_read_raw(struct iio_dev *indio_dev,
ret = bma220_read_reg(data->spi_device, chan->address);
if (ret < 0)
return -EINVAL;
- *val = sign_extend32(ret >> BMA220_DATA_SHIFT, 5);
+ *val = sign_extend32(ret >> chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
ret = bma220_read_reg(data->spi_device, BMA220_REG_RANGE);
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index b0678c351e82..e6081dd0a880 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -170,7 +170,7 @@ static const struct {
{1000, 0, 0x0E},
{2000, 0, 0x0F} };
-static const struct {
+static __maybe_unused const struct {
int bw_bits;
int msec;
} bmc150_accel_sample_upd_time[] = { {0x08, 64},
diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index 24c9387c2968..0fe570316848 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -315,7 +315,7 @@ static const char *const kxtf9_samp_freq_avail =
"25 50 100 200 400 800";
/* Refer to section 4 of the specification */
-static const struct {
+static __maybe_unused const struct {
int odr_bits;
int usec;
} odr_start_up_times[KX_MAX_CHIPS][12] = {
@@ -927,7 +927,8 @@ static int kxcjk1013_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->mutex);
return ret;
}
- *val = sign_extend32(ret >> 4, 11);
+ *val = sign_extend32(ret >> chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
ret = kxcjk1013_set_power_state(data, false);
}
mutex_unlock(&data->mutex);
diff --git a/drivers/iio/accel/mma7455_core.c b/drivers/iio/accel/mma7455_core.c
index 777c6c384b09..e6739ba74edf 100644
--- a/drivers/iio/accel/mma7455_core.c
+++ b/drivers/iio/accel/mma7455_core.c
@@ -134,7 +134,8 @@ static int mma7455_read_raw(struct iio_dev *indio_dev,
if (ret)
return ret;
- *val = sign_extend32(le16_to_cpu(data), 9);
+ *val = sign_extend32(le16_to_cpu(data),
+ chan->scan_type.realbits - 1);
return IIO_VAL_INT;
diff --git a/drivers/iio/accel/mma7660.c b/drivers/iio/accel/mma7660.c
index cd6cdf2c51b0..24b83ccdb950 100644
--- a/drivers/iio/accel/mma7660.c
+++ b/drivers/iio/accel/mma7660.c
@@ -210,10 +210,16 @@ static int mma7660_probe(struct i2c_client *client,
static int mma7660_remove(struct i2c_client *client)
{
struct iio_dev *indio_dev = i2c_get_clientdata(client);
+ int ret;
iio_device_unregister(indio_dev);
- return mma7660_set_mode(iio_priv(indio_dev), MMA7660_MODE_STANDBY);
+ ret = mma7660_set_mode(iio_priv(indio_dev), MMA7660_MODE_STANDBY);
+ if (ret)
+ dev_warn(&client->dev, "Failed to put device in stand-by mode (%pe), ignoring\n",
+ ERR_PTR(ret));
+
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
index 09c7f10fefb6..64b82b4503ad 100644
--- a/drivers/iio/accel/mma8452.c
+++ b/drivers/iio/accel/mma8452.c
@@ -1053,7 +1053,7 @@ static irqreturn_t mma8452_interrupt(int irq, void *p)
{
struct iio_dev *indio_dev = p;
struct mma8452_data *data = iio_priv(indio_dev);
- int ret = IRQ_NONE;
+ irqreturn_t ret = IRQ_NONE;
int src;
src = i2c_smbus_read_byte_data(data->client, MMA8452_INT_SRC);
diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c
index ba3ecb3b57dc..0570ab1cc064 100644
--- a/drivers/iio/accel/mma9553.c
+++ b/drivers/iio/accel/mma9553.c
@@ -917,7 +917,7 @@ static const struct iio_enum mma9553_calibgender_enum = {
static const struct iio_chan_spec_ext_info mma9553_ext_info[] = {
IIO_ENUM("calibgender", IIO_SHARED_BY_TYPE, &mma9553_calibgender_enum),
- IIO_ENUM_AVAILABLE("calibgender", &mma9553_calibgender_enum),
+ IIO_ENUM_AVAILABLE("calibgender", IIO_SHARED_BY_TYPE, &mma9553_calibgender_enum),
{},
};
diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c
index c6b75308148a..43ecacbdc95a 100644
--- a/drivers/iio/accel/sca3000.c
+++ b/drivers/iio/accel/sca3000.c
@@ -534,6 +534,13 @@ static const struct iio_chan_spec sca3000_channels_with_temp[] = {
BIT(IIO_CHAN_INFO_OFFSET),
/* No buffer support */
.scan_index = -1,
+ .scan_type = {
+ .sign = 'u',
+ .realbits = 9,
+ .storagebits = 16,
+ .shift = 5,
+ .endianness = IIO_BE,
+ },
},
{
.type = IIO_ACCEL,
@@ -730,8 +737,9 @@ static int sca3000_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&st->lock);
return ret;
}
- *val = (be16_to_cpup((__be16 *)st->rx) >> 3) & 0x1FFF;
- *val = sign_extend32(*val, 12);
+ *val = sign_extend32(be16_to_cpup((__be16 *)st->rx) >>
+ chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
} else {
/* get the temperature when available */
ret = sca3000_read_data_short(st,
@@ -741,8 +749,9 @@ static int sca3000_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&st->lock);
return ret;
}
- *val = ((st->rx[0] & 0x3F) << 3) |
- ((st->rx[1] & 0xE0) >> 5);
+ *val = (be16_to_cpup((__be16 *)st->rx) >>
+ chan->scan_type.shift) &
+ GENMASK(chan->scan_type.realbits - 1, 0);
}
mutex_unlock(&st->lock);
return IIO_VAL_INT;
diff --git a/drivers/iio/accel/stk8312.c b/drivers/iio/accel/stk8312.c
index 43c621d0f11e..de0cdf8c1f94 100644
--- a/drivers/iio/accel/stk8312.c
+++ b/drivers/iio/accel/stk8312.c
@@ -355,7 +355,7 @@ static int stk8312_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->lock);
return ret;
}
- *val = sign_extend32(ret, 7);
+ *val = sign_extend32(ret, chan->scan_type.realbits - 1);
ret = stk8312_set_mode(data,
data->mode & (~STK8312_MODE_ACTIVE));
mutex_unlock(&data->lock);
diff --git a/drivers/iio/accel/stk8ba50.c b/drivers/iio/accel/stk8ba50.c
index e137a34b5c9a..517c57ed9e94 100644
--- a/drivers/iio/accel/stk8ba50.c
+++ b/drivers/iio/accel/stk8ba50.c
@@ -227,7 +227,8 @@ static int stk8ba50_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->lock);
return -EINVAL;
}
- *val = sign_extend32(ret >> STK8BA50_DATA_SHIFT, 9);
+ *val = sign_extend32(ret >> chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
stk8ba50_set_power(data, STK8BA50_MODE_SUSPEND);
mutex_unlock(&data->lock);
return IIO_VAL_INT;
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 3363af15a43f..4fdc8bfbb407 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -1146,7 +1146,7 @@ config TI_ADS7950
config TI_ADS8344
tristate "Texas Instruments ADS8344"
- depends on SPI && OF
+ depends on SPI
help
If you say yes here you get support for Texas Instruments ADS8344
ADC chips
@@ -1156,7 +1156,7 @@ config TI_ADS8344
config TI_ADS8688
tristate "Texas Instruments ADS8688"
- depends on SPI && OF
+ depends on SPI
help
If you say yes here you get support for Texas Instruments ADS8684 and
and ADS8688 ADC chips
@@ -1166,7 +1166,7 @@ config TI_ADS8688
config TI_ADS124S08
tristate "Texas Instruments ADS124S08"
- depends on SPI && OF
+ depends on SPI
help
If you say yes here you get support for Texas Instruments ADS124S08
and ADS124S06 ADC chips
@@ -1288,4 +1288,19 @@ config XILINX_XADC
The driver can also be build as a module. If so, the module will be called
xilinx-xadc.
+config XILINX_AMS
+ tristate "Xilinx AMS driver"
+ depends on ARCH_ZYNQMP || COMPILE_TEST
+ depends on HAS_IOMEM
+ help
+ Say yes here to have support for the Xilinx AMS for Ultrascale/Ultrascale+
+ System Monitor. With this you can measure and monitor the Voltages and
+ Temperature values on the SOC.
+
+ The driver supports Voltage and Temperature monitoring on Xilinx Ultrascale
+ devices.
+
+ The driver can also be built as a module. If so, the module will be called
+ xilinx-ams.
+
endmenu
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index d3f53549720c..4a8f1833993b 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -115,4 +115,5 @@ obj-$(CONFIG_VF610_ADC) += vf610_adc.o
obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o
xilinx-xadc-y := xilinx-xadc-core.o xilinx-xadc-events.o
obj-$(CONFIG_XILINX_XADC) += xilinx-xadc.o
+obj-$(CONFIG_XILINX_AMS) += xilinx-ams.o
obj-$(CONFIG_SD_ADC_MODULATOR) += sd_adc_modulator.o
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index e45c600fccc0..bc2cfa5f9592 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -347,7 +347,7 @@ static int ad7124_find_free_config_slot(struct ad7124_state *st)
{
unsigned int free_cfg_slot;
- free_cfg_slot = find_next_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS, 0);
+ free_cfg_slot = find_first_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS);
if (free_cfg_slot == AD7124_MAX_CONFIGS)
return -1;
diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index 2121a812b0c3..cc990205f306 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -257,7 +257,8 @@ static const struct iio_chan_spec_ext_info ad7192_calibsys_ext_info[] = {
},
IIO_ENUM("sys_calibration_mode", IIO_SEPARATE,
&ad7192_syscalib_mode_enum),
- IIO_ENUM_AVAILABLE("sys_calibration_mode", &ad7192_syscalib_mode_enum),
+ IIO_ENUM_AVAILABLE("sys_calibration_mode", IIO_SHARED_BY_TYPE,
+ &ad7192_syscalib_mode_enum),
{}
};
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index a8ec3efd659e..1d345d66742d 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -159,7 +159,8 @@ static int ad7266_read_raw(struct iio_dev *indio_dev,
*val = (*val >> 2) & 0xfff;
if (chan->scan_type.sign == 's')
- *val = sign_extend32(*val, 11);
+ *val = sign_extend32(*val,
+ chan->scan_type.realbits - 1);
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h
index 9350ef1f63b5..4f82d7c9acfd 100644
--- a/drivers/iio/adc/ad7606.h
+++ b/drivers/iio/adc/ad7606.h
@@ -62,7 +62,7 @@ struct ad7606_chip_info {
* struct ad7606_state - driver instance specific data
* @dev pointer to kernel device
* @chip_info entry in the table of chips that describes this device
- * @reg regulator info for the the power supply of the device
+ * @reg regulator info for the power supply of the device
* @bops bus operations (SPI or parallel)
* @range voltage range selection, selects which scale to apply
* @oversampling oversampling selection
diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index 1d652d9b2f5c..cd418bd8bd87 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -467,9 +467,6 @@ int ad_sd_validate_trigger(struct iio_dev *indio_dev, struct iio_trigger *trig)
}
EXPORT_SYMBOL_GPL(ad_sd_validate_trigger);
-static const struct iio_trigger_ops ad_sd_trigger_ops = {
-};
-
static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_dev)
{
struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev);
@@ -486,7 +483,6 @@ static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_de
if (sigma_delta->trig == NULL)
return -ENOMEM;
- sigma_delta->trig->ops = &ad_sd_trigger_ops;
init_completion(&sigma_delta->completion);
sigma_delta->irq_dis = true;
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index 92a57cf10fba..854b1f81d807 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -1662,10 +1662,9 @@ static int at91_adc_write_raw(struct iio_dev *indio_dev,
}
}
-static void at91_adc_dma_init(struct platform_device *pdev)
+static void at91_adc_dma_init(struct at91_adc_state *st)
{
- struct iio_dev *indio_dev = platform_get_drvdata(pdev);
- struct at91_adc_state *st = iio_priv(indio_dev);
+ struct device *dev = &st->indio_dev->dev;
struct dma_slave_config config = {0};
/* we have 2 bytes for each channel */
unsigned int sample_size = st->soc_info.platform->nr_channels * 2;
@@ -1680,9 +1679,9 @@ static void at91_adc_dma_init(struct platform_device *pdev)
if (st->dma_st.dma_chan)
return;
- st->dma_st.dma_chan = dma_request_chan(&pdev->dev, "rx");
+ st->dma_st.dma_chan = dma_request_chan(dev, "rx");
if (IS_ERR(st->dma_st.dma_chan)) {
- dev_info(&pdev->dev, "can't get DMA channel\n");
+ dev_info(dev, "can't get DMA channel\n");
st->dma_st.dma_chan = NULL;
goto dma_exit;
}
@@ -1692,7 +1691,7 @@ static void at91_adc_dma_init(struct platform_device *pdev)
&st->dma_st.rx_dma_buf,
GFP_KERNEL);
if (!st->dma_st.rx_buf) {
- dev_info(&pdev->dev, "can't allocate coherent DMA area\n");
+ dev_info(dev, "can't allocate coherent DMA area\n");
goto dma_chan_disable;
}
@@ -1705,11 +1704,11 @@ static void at91_adc_dma_init(struct platform_device *pdev)
config.dst_maxburst = 1;
if (dmaengine_slave_config(st->dma_st.dma_chan, &config)) {
- dev_info(&pdev->dev, "can't configure DMA slave\n");
+ dev_info(dev, "can't configure DMA slave\n");
goto dma_free_area;
}
- dev_info(&pdev->dev, "using %s for rx DMA transfers\n",
+ dev_info(dev, "using %s for rx DMA transfers\n",
dma_chan_name(st->dma_st.dma_chan));
return;
@@ -1721,13 +1720,12 @@ dma_chan_disable:
dma_release_channel(st->dma_st.dma_chan);
st->dma_st.dma_chan = NULL;
dma_exit:
- dev_info(&pdev->dev, "continuing without DMA support\n");
+ dev_info(dev, "continuing without DMA support\n");
}
-static void at91_adc_dma_disable(struct platform_device *pdev)
+static void at91_adc_dma_disable(struct at91_adc_state *st)
{
- struct iio_dev *indio_dev = platform_get_drvdata(pdev);
- struct at91_adc_state *st = iio_priv(indio_dev);
+ struct device *dev = &st->indio_dev->dev;
/* we have 2 bytes for each channel */
unsigned int sample_size = st->soc_info.platform->nr_channels * 2;
unsigned int pages = DIV_ROUND_UP(AT91_HWFIFO_MAX_SIZE *
@@ -1745,7 +1743,7 @@ static void at91_adc_dma_disable(struct platform_device *pdev)
dma_release_channel(st->dma_st.dma_chan);
st->dma_st.dma_chan = NULL;
- dev_info(&pdev->dev, "continuing without DMA support\n");
+ dev_info(dev, "continuing without DMA support\n");
}
static int at91_adc_set_watermark(struct iio_dev *indio_dev, unsigned int val)
@@ -1771,9 +1769,9 @@ static int at91_adc_set_watermark(struct iio_dev *indio_dev, unsigned int val)
*/
if (val == 1)
- at91_adc_dma_disable(to_platform_device(&indio_dev->dev));
+ at91_adc_dma_disable(st);
else if (val > 1)
- at91_adc_dma_init(to_platform_device(&indio_dev->dev));
+ at91_adc_dma_init(st);
/*
* We can start the DMA only after setting the watermark and
@@ -1781,7 +1779,7 @@ static int at91_adc_set_watermark(struct iio_dev *indio_dev, unsigned int val)
*/
ret = at91_adc_buffer_prepare(indio_dev);
if (ret)
- at91_adc_dma_disable(to_platform_device(&indio_dev->dev));
+ at91_adc_dma_disable(st);
return ret;
}
@@ -1828,7 +1826,7 @@ static void at91_adc_hw_init(struct iio_dev *indio_dev)
static ssize_t at91_adc_get_fifo_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct at91_adc_state *st = iio_priv(indio_dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", !!st->dma_st.dma_chan);
@@ -1837,7 +1835,7 @@ static ssize_t at91_adc_get_fifo_state(struct device *dev,
static ssize_t at91_adc_get_watermark(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct at91_adc_state *st = iio_priv(indio_dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", st->dma_st.watermark);
@@ -2078,7 +2076,7 @@ static int at91_adc_probe(struct platform_device *pdev)
return 0;
dma_disable:
- at91_adc_dma_disable(pdev);
+ at91_adc_dma_disable(st);
per_clk_disable_unprepare:
clk_disable_unprepare(st->per_clk);
vref_disable:
@@ -2095,7 +2093,7 @@ static int at91_adc_remove(struct platform_device *pdev)
iio_device_unregister(indio_dev);
- at91_adc_dma_disable(pdev);
+ at91_adc_dma_disable(st);
clk_disable_unprepare(st->per_clk);
diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c
index df99f1365c39..53bf7d4899d2 100644
--- a/drivers/iio/adc/axp20x_adc.c
+++ b/drivers/iio/adc/axp20x_adc.c
@@ -186,6 +186,8 @@ static const struct iio_chan_spec axp20x_adc_channels[] = {
AXP20X_BATT_CHRG_I_H),
AXP20X_ADC_CHANNEL(AXP20X_BATT_DISCHRG_I, "batt_dischrg_i", IIO_CURRENT,
AXP20X_BATT_DISCHRG_I_H),
+ AXP20X_ADC_CHANNEL(AXP20X_TS_IN, "ts_v", IIO_VOLTAGE,
+ AXP20X_TS_IN_H),
};
static const struct iio_chan_spec axp22x_adc_channels[] = {
@@ -203,6 +205,8 @@ static const struct iio_chan_spec axp22x_adc_channels[] = {
AXP20X_BATT_CHRG_I_H),
AXP20X_ADC_CHANNEL(AXP22X_BATT_DISCHRG_I, "batt_dischrg_i", IIO_CURRENT,
AXP20X_BATT_DISCHRG_I_H),
+ AXP20X_ADC_CHANNEL(AXP22X_TS_IN, "ts_v", IIO_VOLTAGE,
+ AXP22X_TS_ADC_H),
};
static const struct iio_chan_spec axp813_adc_channels[] = {
@@ -222,6 +226,8 @@ static const struct iio_chan_spec axp813_adc_channels[] = {
AXP20X_BATT_CHRG_I_H),
AXP20X_ADC_CHANNEL(AXP22X_BATT_DISCHRG_I, "batt_dischrg_i", IIO_CURRENT,
AXP20X_BATT_DISCHRG_I_H),
+ AXP20X_ADC_CHANNEL(AXP813_TS_IN, "ts_v", IIO_VOLTAGE,
+ AXP288_TS_ADC_H),
};
static int axp20x_adc_raw(struct iio_dev *indio_dev,
@@ -296,11 +302,36 @@ static int axp20x_adc_scale_voltage(int channel, int *val, int *val2)
*val2 = 400000;
return IIO_VAL_INT_PLUS_MICRO;
+ case AXP20X_TS_IN:
+ /* 0.8 mV per LSB */
+ *val = 0;
+ *val2 = 800000;
+ return IIO_VAL_INT_PLUS_MICRO;
+
default:
return -EINVAL;
}
}
+static int axp22x_adc_scale_voltage(int channel, int *val, int *val2)
+{
+ switch (channel) {
+ case AXP22X_BATT_V:
+ /* 1.1 mV per LSB */
+ *val = 1;
+ *val2 = 100000;
+ return IIO_VAL_INT_PLUS_MICRO;
+
+ case AXP22X_TS_IN:
+ /* 0.8 mV per LSB */
+ *val = 0;
+ *val2 = 800000;
+ return IIO_VAL_INT_PLUS_MICRO;
+
+ default:
+ return -EINVAL;
+ }
+}
static int axp813_adc_scale_voltage(int channel, int *val, int *val2)
{
switch (channel) {
@@ -314,6 +345,12 @@ static int axp813_adc_scale_voltage(int channel, int *val, int *val2)
*val2 = 100000;
return IIO_VAL_INT_PLUS_MICRO;
+ case AXP813_TS_IN:
+ /* 0.8 mV per LSB */
+ *val = 0;
+ *val2 = 800000;
+ return IIO_VAL_INT_PLUS_MICRO;
+
default:
return -EINVAL;
}
@@ -367,12 +404,7 @@ static int axp22x_adc_scale(struct iio_chan_spec const *chan, int *val,
{
switch (chan->type) {
case IIO_VOLTAGE:
- if (chan->channel != AXP22X_BATT_V)
- return -EINVAL;
-
- *val = 1;
- *val2 = 100000;
- return IIO_VAL_INT_PLUS_MICRO;
+ return axp22x_adc_scale_voltage(chan->channel, val, val2);
case IIO_CURRENT:
*val = 1;
@@ -476,6 +508,7 @@ static int axp22x_read_raw(struct iio_dev *indio_dev,
{
switch (mask) {
case IIO_CHAN_INFO_OFFSET:
+ /* For PMIC temp only */
*val = -2677;
return IIO_VAL_INT;
diff --git a/drivers/iio/adc/envelope-detector.c b/drivers/iio/adc/envelope-detector.c
index d73eac36153f..e911c25d106d 100644
--- a/drivers/iio/adc/envelope-detector.c
+++ b/drivers/iio/adc/envelope-detector.c
@@ -31,14 +31,13 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
#include <linux/iio/consumer.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
diff --git a/drivers/iio/adc/hi8435.c b/drivers/iio/adc/hi8435.c
index 8b353e26668e..e665e14c6e54 100644
--- a/drivers/iio/adc/hi8435.c
+++ b/drivers/iio/adc/hi8435.c
@@ -350,7 +350,7 @@ static const struct iio_enum hi8435_sensing_mode = {
static const struct iio_chan_spec_ext_info hi8435_ext_info[] = {
IIO_ENUM("sensing_mode", IIO_SEPARATE, &hi8435_sensing_mode),
- IIO_ENUM_AVAILABLE("sensing_mode", &hi8435_sensing_mode),
+ IIO_ENUM_AVAILABLE("sensing_mode", IIO_SHARED_BY_TYPE, &hi8435_sensing_mode),
{},
};
diff --git a/drivers/iio/adc/imx7d_adc.c b/drivers/iio/adc/imx7d_adc.c
index 092f8d296527..12f5b8e34c84 100644
--- a/drivers/iio/adc/imx7d_adc.c
+++ b/drivers/iio/adc/imx7d_adc.c
@@ -522,12 +522,11 @@ static int imx7d_adc_probe(struct platform_device *pdev)
imx7d_adc_feature_config(info);
- ret = imx7d_adc_enable(&indio_dev->dev);
+ ret = imx7d_adc_enable(dev);
if (ret)
return ret;
- ret = devm_add_action_or_reset(dev, __imx7d_adc_disable,
- &indio_dev->dev);
+ ret = devm_add_action_or_reset(dev, __imx7d_adc_disable, dev);
if (ret)
return ret;
diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index a4b2ff9e0dd5..4f9992a51e64 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -550,7 +550,7 @@ static ssize_t ina2xx_allow_async_readout_store(struct device *dev,
bool val;
int ret;
- ret = strtobool((const char *) buf, &val);
+ ret = strtobool(buf, &val);
if (ret)
return ret;
@@ -842,15 +842,13 @@ static int ina2xx_buffer_enable(struct iio_dev *indio_dev)
dev_dbg(&indio_dev->dev, "Async readout mode: %d\n",
chip->allow_async_readout);
- task = kthread_create(ina2xx_capture_thread, (void *)indio_dev,
- "%s:%d-%uus", indio_dev->name,
- iio_device_id(indio_dev),
- sampling_us);
+ task = kthread_run(ina2xx_capture_thread, (void *)indio_dev,
+ "%s:%d-%uus", indio_dev->name,
+ iio_device_id(indio_dev),
+ sampling_us);
if (IS_ERR(task))
return PTR_ERR(task);
- get_task_struct(task);
- wake_up_process(task);
chip->task = task;
return 0;
@@ -862,7 +860,6 @@ static int ina2xx_buffer_disable(struct iio_dev *indio_dev)
if (chip->task) {
kthread_stop(chip->task);
- put_task_struct(chip->task);
chip->task = NULL;
}
@@ -974,7 +971,7 @@ static int ina2xx_probe(struct i2c_client *client,
}
if (client->dev.of_node)
- type = (enum ina2xx_ids)of_device_get_match_data(&client->dev);
+ type = (uintptr_t)of_device_get_match_data(&client->dev);
else
type = id->driver_data;
chip->config = &ina2xx_config[type];
diff --git a/drivers/iio/adc/lpc18xx_adc.c b/drivers/iio/adc/lpc18xx_adc.c
index ceefa4d793cf..ae9c9384f23e 100644
--- a/drivers/iio/adc/lpc18xx_adc.c
+++ b/drivers/iio/adc/lpc18xx_adc.c
@@ -157,9 +157,6 @@ static int lpc18xx_adc_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, PTR_ERR(adc->clk),
"error getting clock\n");
- rate = clk_get_rate(adc->clk);
- clkdiv = DIV_ROUND_UP(rate, LPC18XX_ADC_CLK_TARGET);
-
adc->vref = devm_regulator_get(&pdev->dev, "vref");
if (IS_ERR(adc->vref))
return dev_err_probe(&pdev->dev, PTR_ERR(adc->vref),
@@ -192,6 +189,9 @@ static int lpc18xx_adc_probe(struct platform_device *pdev)
if (ret)
return ret;
+ rate = clk_get_rate(adc->clk);
+ clkdiv = DIV_ROUND_UP(rate, LPC18XX_ADC_CLK_TARGET);
+
adc->cr_reg = (clkdiv << LPC18XX_ADC_CR_CLKDIV_SHIFT) |
LPC18XX_ADC_CR_PDN;
writel(adc->cr_reg, adc->base + LPC18XX_ADC_CR);
diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index 052ab23f10b2..01a4275e9c46 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c
@@ -22,7 +22,8 @@
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
#define DRIVER_NAME "max9611"
@@ -513,11 +514,9 @@ static int max9611_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
const char * const shunt_res_prop = "shunt-resistor-micro-ohms";
- const struct device_node *of_node = client->dev.of_node;
- const struct of_device_id *of_id =
- of_match_device(max9611_of_table, &client->dev);
struct max9611_dev *max9611;
struct iio_dev *indio_dev;
+ struct device *dev = &client->dev;
unsigned int of_shunt;
int ret;
@@ -528,15 +527,14 @@ static int max9611_probe(struct i2c_client *client,
i2c_set_clientdata(client, indio_dev);
max9611 = iio_priv(indio_dev);
- max9611->dev = &client->dev;
+ max9611->dev = dev;
max9611->i2c_client = client;
mutex_init(&max9611->lock);
- ret = of_property_read_u32(of_node, shunt_res_prop, &of_shunt);
+ ret = device_property_read_u32(dev, shunt_res_prop, &of_shunt);
if (ret) {
- dev_err(&client->dev,
- "Missing %s property for %pOF node\n",
- shunt_res_prop, of_node);
+ dev_err(dev, "Missing %s property for %pfw node\n",
+ shunt_res_prop, dev_fwnode(dev));
return ret;
}
max9611->shunt_resistor_uohm = of_shunt;
@@ -545,13 +543,13 @@ static int max9611_probe(struct i2c_client *client,
if (ret)
return ret;
- indio_dev->name = of_id->data;
+ indio_dev->name = device_get_match_data(dev);
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->info = &indio_info;
indio_dev->channels = max9611_channels;
indio_dev->num_channels = ARRAY_SIZE(max9611_channels);
- return devm_iio_device_register(&client->dev, indio_dev);
+ return devm_iio_device_register(dev, indio_dev);
}
static struct i2c_driver max9611_driver = {
diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c
index e573da5397bb..13535f148c4c 100644
--- a/drivers/iio/adc/mcp3911.c
+++ b/drivers/iio/adc/mcp3911.c
@@ -10,6 +10,8 @@
#include <linux/err.h>
#include <linux/iio/iio.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
#include <linux/regulator/consumer.h>
#include <linux/spi/spi.h>
@@ -200,12 +202,13 @@ static const struct iio_info mcp3911_info = {
.write_raw = mcp3911_write_raw,
};
-static int mcp3911_config(struct mcp3911 *adc, struct device_node *of_node)
+static int mcp3911_config(struct mcp3911 *adc)
{
+ struct device *dev = &adc->spi->dev;
u32 configreg;
int ret;
- of_property_read_u32(of_node, "device-addr", &adc->dev_addr);
+ device_property_read_u32(dev, "device-addr", &adc->dev_addr);
if (adc->dev_addr > 3) {
dev_err(&adc->spi->dev,
"invalid device address (%i). Must be in range 0-3.\n",
@@ -289,7 +292,7 @@ static int mcp3911_probe(struct spi_device *spi)
}
}
- ret = mcp3911_config(adc, spi->dev.of_node);
+ ret = mcp3911_config(adc);
if (ret)
goto clk_disable;
diff --git a/drivers/iio/adc/rcar-gyroadc.c b/drivers/iio/adc/rcar-gyroadc.c
index a48895046408..727ea6c68049 100644
--- a/drivers/iio/adc/rcar-gyroadc.c
+++ b/drivers/iio/adc/rcar-gyroadc.c
@@ -511,8 +511,7 @@ static int rcar_gyroadc_probe(struct platform_device *pdev)
if (ret)
return ret;
- priv->model = (enum rcar_gyroadc_model)
- of_device_get_match_data(&pdev->dev);
+ priv->model = (uintptr_t)of_device_get_match_data(&pdev->dev);
platform_set_drvdata(pdev, indio_dev);
diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c
index 32fbf57c362f..9d5be52bd948 100644
--- a/drivers/iio/adc/rzg2l_adc.c
+++ b/drivers/iio/adc/rzg2l_adc.c
@@ -506,10 +506,8 @@ static int rzg2l_adc_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "no irq resource\n");
+ if (irq < 0)
return irq;
- }
ret = devm_request_irq(dev, irq, rzg2l_adc_isr,
0, dev_name(dev), adc);
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 8cd258cb2682..897166d9e45c 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -2025,7 +2025,8 @@ static int stm32_adc_generic_chan_init(struct iio_dev *indio_dev,
if (strlen(name) >= STM32_ADC_CH_SZ) {
dev_err(&indio_dev->dev, "Label %s exceeds %d characters\n",
name, STM32_ADC_CH_SZ);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
strncpy(adc->chan_name[val], name, STM32_ADC_CH_SZ);
ret = stm32_adc_populate_int_ch(indio_dev, name, val);
diff --git a/drivers/iio/adc/stmpe-adc.c b/drivers/iio/adc/stmpe-adc.c
index fba659bfdb40..d2d405388499 100644
--- a/drivers/iio/adc/stmpe-adc.c
+++ b/drivers/iio/adc/stmpe-adc.c
@@ -256,6 +256,7 @@ static int stmpe_adc_probe(struct platform_device *pdev)
struct stmpe_adc *info;
struct device_node *np;
u32 norequest_mask = 0;
+ unsigned long bits;
int irq_temp, irq_adc;
int num_chan = 0;
int i = 0;
@@ -309,8 +310,8 @@ static int stmpe_adc_probe(struct platform_device *pdev)
of_property_read_u32(np, "st,norequest-mask", &norequest_mask);
- for_each_clear_bit(i, (unsigned long *) &norequest_mask,
- (STMPE_ADC_LAST_NR + 1)) {
+ bits = norequest_mask;
+ for_each_clear_bit(i, &bits, (STMPE_ADC_LAST_NR + 1)) {
stmpe_adc_voltage_chan(&info->stmpe_adc_iio_channels[num_chan], i);
num_chan++;
}
diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c
index 16fc608db36a..bd48b073e720 100644
--- a/drivers/iio/adc/ti-adc081c.c
+++ b/drivers/iio/adc/ti-adc081c.c
@@ -19,6 +19,7 @@
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
+#include <linux/property.h>
#include <linux/iio/iio.h>
#include <linux/iio/buffer.h>
@@ -156,13 +157,16 @@ static int adc081c_probe(struct i2c_client *client,
{
struct iio_dev *iio;
struct adc081c *adc;
- struct adcxx1c_model *model;
+ const struct adcxx1c_model *model;
int err;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA))
return -EOPNOTSUPP;
- model = &adcxx1c_models[id->driver_data];
+ if (dev_fwnode(&client->dev))
+ model = device_get_match_data(&client->dev);
+ else
+ model = &adcxx1c_models[id->driver_data];
iio = devm_iio_device_alloc(&client->dev, sizeof(*adc));
if (!iio)
@@ -210,10 +214,17 @@ static const struct i2c_device_id adc081c_id[] = {
};
MODULE_DEVICE_TABLE(i2c, adc081c_id);
+static const struct acpi_device_id adc081c_acpi_match[] = {
+ /* Used on some AAEON boards */
+ { "ADC081C", (kernel_ulong_t)&adcxx1c_models[ADC081C] },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, adc081c_acpi_match);
+
static const struct of_device_id adc081c_of_match[] = {
- { .compatible = "ti,adc081c" },
- { .compatible = "ti,adc101c" },
- { .compatible = "ti,adc121c" },
+ { .compatible = "ti,adc081c", .data = &adcxx1c_models[ADC081C] },
+ { .compatible = "ti,adc101c", .data = &adcxx1c_models[ADC101C] },
+ { .compatible = "ti,adc121c", .data = &adcxx1c_models[ADC121C] },
{ }
};
MODULE_DEVICE_TABLE(of, adc081c_of_match);
@@ -222,6 +233,7 @@ static struct i2c_driver adc081c_driver = {
.driver = {
.name = "adc081c",
.of_match_table = adc081c_of_match,
+ .acpi_match_table = adc081c_acpi_match,
},
.probe = adc081c_probe,
.id_table = adc081c_id,
diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c
index fcd5d39dd03e..6eb62b564dae 100644
--- a/drivers/iio/adc/ti-adc12138.c
+++ b/drivers/iio/adc/ti-adc12138.c
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <linux/clk.h>
+#include <linux/property.h>
#include <linux/spi/spi.h>
#include <linux/iio/iio.h>
#include <linux/iio/buffer.h>
@@ -239,7 +240,8 @@ static int adc12138_read_raw(struct iio_dev *iio,
if (ret)
return ret;
- *value = sign_extend32(be16_to_cpu(data) >> 3, 12);
+ *value = sign_extend32(be16_to_cpu(data) >> channel->scan_type.shift,
+ channel->scan_type.realbits - 1);
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
@@ -429,8 +431,8 @@ static int adc12138_probe(struct spi_device *spi)
return -EINVAL;
}
- ret = of_property_read_u32(spi->dev.of_node, "ti,acquisition-time",
- &adc->acquisition_time);
+ ret = device_property_read_u32(&spi->dev, "ti,acquisition-time",
+ &adc->acquisition_time);
if (ret)
adc->acquisition_time = 10;
@@ -516,8 +518,6 @@ static int adc12138_remove(struct spi_device *spi)
return 0;
}
-#ifdef CONFIG_OF
-
static const struct of_device_id adc12138_dt_ids[] = {
{ .compatible = "ti,adc12130", },
{ .compatible = "ti,adc12132", },
@@ -526,8 +526,6 @@ static const struct of_device_id adc12138_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, adc12138_dt_ids);
-#endif
-
static const struct spi_device_id adc12138_id[] = {
{ "adc12130", adc12130 },
{ "adc12132", adc12132 },
@@ -539,7 +537,7 @@ MODULE_DEVICE_TABLE(spi, adc12138_id);
static struct spi_driver adc12138_driver = {
.driver = {
.name = "adc12138",
- .of_match_table = of_match_ptr(adc12138_dt_ids),
+ .of_match_table = adc12138_dt_ids,
},
.probe = adc12138_probe,
.remove = adc12138_remove,
diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index b0352e91ac16..068efbce1710 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -464,9 +464,7 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
mutex_lock(&data->lock);
switch (mask) {
- case IIO_CHAN_INFO_RAW: {
- int shift = chan->scan_type.shift;
-
+ case IIO_CHAN_INFO_RAW:
ret = iio_device_claim_direct_mode(indio_dev);
if (ret)
break;
@@ -487,7 +485,8 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
goto release_direct;
}
- *val = sign_extend32(*val >> shift, 15 - shift);
+ *val = sign_extend32(*val >> chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
ret = ads1015_set_power_state(data, false);
if (ret < 0)
@@ -497,7 +496,6 @@ static int ads1015_read_raw(struct iio_dev *indio_dev,
release_direct:
iio_device_release_direct_mode(indio_dev);
break;
- }
case IIO_CHAN_INFO_SCALE:
idx = data->channel_data[chan->address].pga;
*val = ads1015_fullscale_range[idx];
@@ -952,7 +950,7 @@ static int ads1015_probe(struct i2c_client *client,
indio_dev->name = ADS1015_DRV_NAME;
indio_dev->modes = INDIO_DIRECT_MODE;
- chip = (enum chip_ids)device_get_match_data(&client->dev);
+ chip = (uintptr_t)device_get_match_data(&client->dev);
if (chip == ADSXXXX)
chip = id->driver_data;
switch (chip) {
diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c
index 17d0da5877a9..767b3b634809 100644
--- a/drivers/iio/adc/ti-ads124s08.c
+++ b/drivers/iio/adc/ti-ads124s08.c
@@ -8,8 +8,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/mod_devicetable.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c
index 79c803537dc4..2e24717d7f55 100644
--- a/drivers/iio/adc/ti-ads8688.c
+++ b/drivers/iio/adc/ti-ads8688.c
@@ -281,12 +281,10 @@ static int ads8688_write_reg_range(struct iio_dev *indio_dev,
enum ads8688_range range)
{
unsigned int tmp;
- int ret;
tmp = ADS8688_PROG_REG_RANGE_CH(chan->channel);
- ret = ads8688_prog_write(indio_dev, tmp, range);
- return ret;
+ return ads8688_prog_write(indio_dev, tmp, range);
}
static int ads8688_write_raw(struct iio_dev *indio_dev,
diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c
new file mode 100644
index 000000000000..8343c5f74121
--- /dev/null
+++ b/drivers/iio/adc/xilinx-ams.c
@@ -0,0 +1,1451 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx AMS driver
+ *
+ * Copyright (C) 2021 Xilinx, Inc.
+ *
+ * Manish Narani <mnarani@xilinx.com>
+ * Rajnikant Bhojani <rajnikant.bhojani@xilinx.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/overflow.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+
+#include <linux/iio/events.h>
+#include <linux/iio/iio.h>
+
+/* AMS registers definitions */
+#define AMS_ISR_0 0x010
+#define AMS_ISR_1 0x014
+#define AMS_IER_0 0x020
+#define AMS_IER_1 0x024
+#define AMS_IDR_0 0x028
+#define AMS_IDR_1 0x02C
+#define AMS_PS_CSTS 0x040
+#define AMS_PL_CSTS 0x044
+
+#define AMS_VCC_PSPLL0 0x060
+#define AMS_VCC_PSPLL3 0x06C
+#define AMS_VCCINT 0x078
+#define AMS_VCCBRAM 0x07C
+#define AMS_VCCAUX 0x080
+#define AMS_PSDDRPLL 0x084
+#define AMS_PSINTFPDDR 0x09C
+
+#define AMS_VCC_PSPLL0_CH 48
+#define AMS_VCC_PSPLL3_CH 51
+#define AMS_VCCINT_CH 54
+#define AMS_VCCBRAM_CH 55
+#define AMS_VCCAUX_CH 56
+#define AMS_PSDDRPLL_CH 57
+#define AMS_PSINTFPDDR_CH 63
+
+#define AMS_REG_CONFIG0 0x100
+#define AMS_REG_CONFIG1 0x104
+#define AMS_REG_CONFIG3 0x10C
+#define AMS_REG_CONFIG4 0x110
+#define AMS_REG_SEQ_CH0 0x120
+#define AMS_REG_SEQ_CH1 0x124
+#define AMS_REG_SEQ_CH2 0x118
+
+#define AMS_VUSER0_MASK BIT(0)
+#define AMS_VUSER1_MASK BIT(1)
+#define AMS_VUSER2_MASK BIT(2)
+#define AMS_VUSER3_MASK BIT(3)
+
+#define AMS_TEMP 0x000
+#define AMS_SUPPLY1 0x004
+#define AMS_SUPPLY2 0x008
+#define AMS_VP_VN 0x00C
+#define AMS_VREFP 0x010
+#define AMS_VREFN 0x014
+#define AMS_SUPPLY3 0x018
+#define AMS_SUPPLY4 0x034
+#define AMS_SUPPLY5 0x038
+#define AMS_SUPPLY6 0x03C
+#define AMS_SUPPLY7 0x200
+#define AMS_SUPPLY8 0x204
+#define AMS_SUPPLY9 0x208
+#define AMS_SUPPLY10 0x20C
+#define AMS_VCCAMS 0x210
+#define AMS_TEMP_REMOTE 0x214
+
+#define AMS_REG_VAUX(x) (0x40 + 4 * (x))
+
+#define AMS_PS_RESET_VALUE 0xFFFF
+#define AMS_PL_RESET_VALUE 0xFFFF
+
+#define AMS_CONF0_CHANNEL_NUM_MASK GENMASK(6, 0)
+
+#define AMS_CONF1_SEQ_MASK GENMASK(15, 12)
+#define AMS_CONF1_SEQ_DEFAULT FIELD_PREP(AMS_CONF1_SEQ_MASK, 0)
+#define AMS_CONF1_SEQ_CONTINUOUS FIELD_PREP(AMS_CONF1_SEQ_MASK, 1)
+#define AMS_CONF1_SEQ_SINGLE_CHANNEL FIELD_PREP(AMS_CONF1_SEQ_MASK, 2)
+
+#define AMS_REG_SEQ0_MASK GENMASK(15, 0)
+#define AMS_REG_SEQ2_MASK GENMASK(21, 16)
+#define AMS_REG_SEQ1_MASK GENMASK_ULL(37, 22)
+
+#define AMS_PS_SEQ_MASK GENMASK(21, 0)
+#define AMS_PL_SEQ_MASK GENMASK_ULL(59, 22)
+
+#define AMS_ALARM_TEMP 0x140
+#define AMS_ALARM_SUPPLY1 0x144
+#define AMS_ALARM_SUPPLY2 0x148
+#define AMS_ALARM_SUPPLY3 0x160
+#define AMS_ALARM_SUPPLY4 0x164
+#define AMS_ALARM_SUPPLY5 0x168
+#define AMS_ALARM_SUPPLY6 0x16C
+#define AMS_ALARM_SUPPLY7 0x180
+#define AMS_ALARM_SUPPLY8 0x184
+#define AMS_ALARM_SUPPLY9 0x188
+#define AMS_ALARM_SUPPLY10 0x18C
+#define AMS_ALARM_VCCAMS 0x190
+#define AMS_ALARM_TEMP_REMOTE 0x194
+#define AMS_ALARM_THRESHOLD_OFF_10 0x10
+#define AMS_ALARM_THRESHOLD_OFF_20 0x20
+
+#define AMS_ALARM_THR_DIRECT_MASK BIT(1)
+#define AMS_ALARM_THR_MIN 0x0000
+#define AMS_ALARM_THR_MAX (BIT(16) - 1)
+
+#define AMS_ALARM_MASK GENMASK_ULL(63, 0)
+#define AMS_NO_OF_ALARMS 32
+#define AMS_PL_ALARM_START 16
+#define AMS_PL_ALARM_MASK GENMASK(31, 16)
+#define AMS_ISR0_ALARM_MASK GENMASK(31, 0)
+#define AMS_ISR1_ALARM_MASK (GENMASK(31, 29) | GENMASK(4, 0))
+#define AMS_ISR1_EOC_MASK BIT(3)
+#define AMS_ISR1_INTR_MASK GENMASK_ULL(63, 32)
+#define AMS_ISR0_ALARM_2_TO_0_MASK GENMASK(2, 0)
+#define AMS_ISR0_ALARM_6_TO_3_MASK GENMASK(6, 3)
+#define AMS_ISR0_ALARM_12_TO_7_MASK GENMASK(13, 8)
+#define AMS_CONF1_ALARM_2_TO_0_MASK GENMASK(3, 1)
+#define AMS_CONF1_ALARM_6_TO_3_MASK GENMASK(11, 8)
+#define AMS_CONF1_ALARM_12_TO_7_MASK GENMASK(5, 0)
+#define AMS_REGCFG1_ALARM_MASK \
+ (AMS_CONF1_ALARM_2_TO_0_MASK | AMS_CONF1_ALARM_6_TO_3_MASK | BIT(0))
+#define AMS_REGCFG3_ALARM_MASK AMS_CONF1_ALARM_12_TO_7_MASK
+
+#define AMS_PS_CSTS_PS_READY (BIT(27) | BIT(16))
+#define AMS_PL_CSTS_ACCESS_MASK BIT(1)
+
+#define AMS_PL_MAX_FIXED_CHANNEL 10
+#define AMS_PL_MAX_EXT_CHANNEL 20
+
+#define AMS_INIT_POLL_TIME_US 200
+#define AMS_INIT_TIMEOUT_US 10000
+#define AMS_UNMASK_TIMEOUT_MS 500
+
+/*
+ * Following scale and offset value is derived from
+ * UG580 (v1.7) December 20, 2016
+ */
+#define AMS_SUPPLY_SCALE_1VOLT_mV 1000
+#define AMS_SUPPLY_SCALE_3VOLT_mV 3000
+#define AMS_SUPPLY_SCALE_6VOLT_mV 6000
+#define AMS_SUPPLY_SCALE_DIV_BIT 16
+
+#define AMS_TEMP_SCALE 509314
+#define AMS_TEMP_SCALE_DIV_BIT 16
+#define AMS_TEMP_OFFSET -((280230LL << 16) / 509314)
+
+enum ams_alarm_bit {
+ AMS_ALARM_BIT_TEMP = 0,
+ AMS_ALARM_BIT_SUPPLY1 = 1,
+ AMS_ALARM_BIT_SUPPLY2 = 2,
+ AMS_ALARM_BIT_SUPPLY3 = 3,
+ AMS_ALARM_BIT_SUPPLY4 = 4,
+ AMS_ALARM_BIT_SUPPLY5 = 5,
+ AMS_ALARM_BIT_SUPPLY6 = 6,
+ AMS_ALARM_BIT_RESERVED = 7,
+ AMS_ALARM_BIT_SUPPLY7 = 8,
+ AMS_ALARM_BIT_SUPPLY8 = 9,
+ AMS_ALARM_BIT_SUPPLY9 = 10,
+ AMS_ALARM_BIT_SUPPLY10 = 11,
+ AMS_ALARM_BIT_VCCAMS = 12,
+ AMS_ALARM_BIT_TEMP_REMOTE = 13,
+};
+
+enum ams_seq {
+ AMS_SEQ_VCC_PSPLL = 0,
+ AMS_SEQ_VCC_PSBATT = 1,
+ AMS_SEQ_VCCINT = 2,
+ AMS_SEQ_VCCBRAM = 3,
+ AMS_SEQ_VCCAUX = 4,
+ AMS_SEQ_PSDDRPLL = 5,
+ AMS_SEQ_INTDDR = 6,
+};
+
+enum ams_ps_pl_seq {
+ AMS_SEQ_CALIB = 0,
+ AMS_SEQ_RSVD_1 = 1,
+ AMS_SEQ_RSVD_2 = 2,
+ AMS_SEQ_TEST = 3,
+ AMS_SEQ_RSVD_4 = 4,
+ AMS_SEQ_SUPPLY4 = 5,
+ AMS_SEQ_SUPPLY5 = 6,
+ AMS_SEQ_SUPPLY6 = 7,
+ AMS_SEQ_TEMP = 8,
+ AMS_SEQ_SUPPLY2 = 9,
+ AMS_SEQ_SUPPLY1 = 10,
+ AMS_SEQ_VP_VN = 11,
+ AMS_SEQ_VREFP = 12,
+ AMS_SEQ_VREFN = 13,
+ AMS_SEQ_SUPPLY3 = 14,
+ AMS_SEQ_CURRENT_MON = 15,
+ AMS_SEQ_SUPPLY7 = 16,
+ AMS_SEQ_SUPPLY8 = 17,
+ AMS_SEQ_SUPPLY9 = 18,
+ AMS_SEQ_SUPPLY10 = 19,
+ AMS_SEQ_VCCAMS = 20,
+ AMS_SEQ_TEMP_REMOTE = 21,
+ AMS_SEQ_MAX = 22
+};
+
+#define AMS_PS_SEQ_MAX AMS_SEQ_MAX
+#define AMS_SEQ(x) (AMS_SEQ_MAX + (x))
+#define PS_SEQ(x) (x)
+#define PL_SEQ(x) (AMS_PS_SEQ_MAX + (x))
+#define AMS_CTRL_SEQ_BASE (AMS_PS_SEQ_MAX * 3)
+
+#define AMS_CHAN_TEMP(_scan_index, _addr) { \
+ .type = IIO_TEMP, \
+ .indexed = 1, \
+ .address = (_addr), \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_SCALE) | \
+ BIT(IIO_CHAN_INFO_OFFSET), \
+ .event_spec = ams_temp_events, \
+ .scan_index = _scan_index, \
+ .num_event_specs = ARRAY_SIZE(ams_temp_events), \
+}
+
+#define AMS_CHAN_VOLTAGE(_scan_index, _addr, _alarm) { \
+ .type = IIO_VOLTAGE, \
+ .indexed = 1, \
+ .address = (_addr), \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_SCALE), \
+ .event_spec = (_alarm) ? ams_voltage_events : NULL, \
+ .scan_index = _scan_index, \
+ .num_event_specs = (_alarm) ? ARRAY_SIZE(ams_voltage_events) : 0, \
+}
+
+#define AMS_PS_CHAN_TEMP(_scan_index, _addr) \
+ AMS_CHAN_TEMP(PS_SEQ(_scan_index), _addr)
+#define AMS_PS_CHAN_VOLTAGE(_scan_index, _addr) \
+ AMS_CHAN_VOLTAGE(PS_SEQ(_scan_index), _addr, true)
+
+#define AMS_PL_CHAN_TEMP(_scan_index, _addr) \
+ AMS_CHAN_TEMP(PL_SEQ(_scan_index), _addr)
+#define AMS_PL_CHAN_VOLTAGE(_scan_index, _addr, _alarm) \
+ AMS_CHAN_VOLTAGE(PL_SEQ(_scan_index), _addr, _alarm)
+#define AMS_PL_AUX_CHAN_VOLTAGE(_auxno) \
+ AMS_CHAN_VOLTAGE(PL_SEQ(AMS_SEQ(_auxno)), AMS_REG_VAUX(_auxno), false)
+#define AMS_CTRL_CHAN_VOLTAGE(_scan_index, _addr) \
+ AMS_CHAN_VOLTAGE(PL_SEQ(AMS_SEQ(AMS_SEQ(_scan_index))), _addr, false)
+
+/**
+ * struct ams - This structure contains necessary state for xilinx-ams to operate
+ * @base: physical base address of device
+ * @ps_base: physical base address of PS device
+ * @pl_base: physical base address of PL device
+ * @clk: clocks associated with the device
+ * @dev: pointer to device struct
+ * @lock: to handle multiple user interaction
+ * @intr_lock: to protect interrupt mask values
+ * @alarm_mask: alarm configuration
+ * @current_masked_alarm: currently masked due to alarm
+ * @intr_mask: interrupt configuration
+ * @ams_unmask_work: re-enables event once the event condition disappears
+ *
+ */
+struct ams {
+ void __iomem *base;
+ void __iomem *ps_base;
+ void __iomem *pl_base;
+ struct clk *clk;
+ struct device *dev;
+ struct mutex lock;
+ spinlock_t intr_lock;
+ unsigned int alarm_mask;
+ unsigned int current_masked_alarm;
+ u64 intr_mask;
+ struct delayed_work ams_unmask_work;
+};
+
+static inline void ams_ps_update_reg(struct ams *ams, unsigned int offset,
+ u32 mask, u32 data)
+{
+ u32 val, regval;
+
+ val = readl(ams->ps_base + offset);
+ regval = (val & ~mask) | (data & mask);
+ writel(regval, ams->ps_base + offset);
+}
+
+static inline void ams_pl_update_reg(struct ams *ams, unsigned int offset,
+ u32 mask, u32 data)
+{
+ u32 val, regval;
+
+ val = readl(ams->pl_base + offset);
+ regval = (val & ~mask) | (data & mask);
+ writel(regval, ams->pl_base + offset);
+}
+
+static void ams_update_intrmask(struct ams *ams, u64 mask, u64 val)
+{
+ u32 regval;
+
+ ams->intr_mask = (ams->intr_mask & ~mask) | (val & mask);
+
+ regval = ~(ams->intr_mask | ams->current_masked_alarm);
+ writel(regval, ams->base + AMS_IER_0);
+
+ regval = ~(FIELD_GET(AMS_ISR1_INTR_MASK, ams->intr_mask));
+ writel(regval, ams->base + AMS_IER_1);
+
+ regval = ams->intr_mask | ams->current_masked_alarm;
+ writel(regval, ams->base + AMS_IDR_0);
+
+ regval = FIELD_GET(AMS_ISR1_INTR_MASK, ams->intr_mask);
+ writel(regval, ams->base + AMS_IDR_1);
+}
+
+static void ams_disable_all_alarms(struct ams *ams)
+{
+ /* disable PS module alarm */
+ if (ams->ps_base) {
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_REGCFG1_ALARM_MASK,
+ AMS_REGCFG1_ALARM_MASK);
+ ams_ps_update_reg(ams, AMS_REG_CONFIG3, AMS_REGCFG3_ALARM_MASK,
+ AMS_REGCFG3_ALARM_MASK);
+ }
+
+ /* disable PL module alarm */
+ if (ams->pl_base) {
+ ams_pl_update_reg(ams, AMS_REG_CONFIG1, AMS_REGCFG1_ALARM_MASK,
+ AMS_REGCFG1_ALARM_MASK);
+ ams_pl_update_reg(ams, AMS_REG_CONFIG3, AMS_REGCFG3_ALARM_MASK,
+ AMS_REGCFG3_ALARM_MASK);
+ }
+}
+
+static void ams_update_ps_alarm(struct ams *ams, unsigned long alarm_mask)
+{
+ u32 cfg;
+ u32 val;
+
+ val = FIELD_GET(AMS_ISR0_ALARM_2_TO_0_MASK, alarm_mask);
+ cfg = ~(FIELD_PREP(AMS_CONF1_ALARM_2_TO_0_MASK, val));
+
+ val = FIELD_GET(AMS_ISR0_ALARM_6_TO_3_MASK, alarm_mask);
+ cfg &= ~(FIELD_PREP(AMS_CONF1_ALARM_6_TO_3_MASK, val));
+
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_REGCFG1_ALARM_MASK, cfg);
+
+ val = FIELD_GET(AMS_ISR0_ALARM_12_TO_7_MASK, alarm_mask);
+ cfg = ~(FIELD_PREP(AMS_CONF1_ALARM_12_TO_7_MASK, val));
+ ams_ps_update_reg(ams, AMS_REG_CONFIG3, AMS_REGCFG3_ALARM_MASK, cfg);
+}
+
+static void ams_update_pl_alarm(struct ams *ams, unsigned long alarm_mask)
+{
+ unsigned long pl_alarm_mask;
+ u32 cfg;
+ u32 val;
+
+ pl_alarm_mask = FIELD_GET(AMS_PL_ALARM_MASK, alarm_mask);
+
+ val = FIELD_GET(AMS_ISR0_ALARM_2_TO_0_MASK, pl_alarm_mask);
+ cfg = ~(FIELD_PREP(AMS_CONF1_ALARM_2_TO_0_MASK, val));
+
+ val = FIELD_GET(AMS_ISR0_ALARM_6_TO_3_MASK, pl_alarm_mask);
+ cfg &= ~(FIELD_PREP(AMS_CONF1_ALARM_6_TO_3_MASK, val));
+
+ ams_pl_update_reg(ams, AMS_REG_CONFIG1, AMS_REGCFG1_ALARM_MASK, cfg);
+
+ val = FIELD_GET(AMS_ISR0_ALARM_12_TO_7_MASK, pl_alarm_mask);
+ cfg = ~(FIELD_PREP(AMS_CONF1_ALARM_12_TO_7_MASK, val));
+ ams_pl_update_reg(ams, AMS_REG_CONFIG3, AMS_REGCFG3_ALARM_MASK, cfg);
+}
+
+static void ams_update_alarm(struct ams *ams, unsigned long alarm_mask)
+{
+ unsigned long flags;
+
+ if (ams->ps_base)
+ ams_update_ps_alarm(ams, alarm_mask);
+
+ if (ams->pl_base)
+ ams_update_pl_alarm(ams, alarm_mask);
+
+ spin_lock_irqsave(&ams->intr_lock, flags);
+ ams_update_intrmask(ams, AMS_ISR0_ALARM_MASK, ~alarm_mask);
+ spin_unlock_irqrestore(&ams->intr_lock, flags);
+}
+
+static void ams_enable_channel_sequence(struct iio_dev *indio_dev)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ unsigned long long scan_mask;
+ int i;
+ u32 regval;
+
+ /*
+ * Enable channel sequence. First 22 bits of scan_mask represent
+ * PS channels, and next remaining bits represent PL channels.
+ */
+
+ /* Run calibration of PS & PL as part of the sequence */
+ scan_mask = BIT(0) | BIT(AMS_PS_SEQ_MAX);
+ for (i = 0; i < indio_dev->num_channels; i++)
+ scan_mask |= BIT_ULL(indio_dev->channels[i].scan_index);
+
+ if (ams->ps_base) {
+ /* put sysmon in a soft reset to change the sequence */
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_DEFAULT);
+
+ /* configure basic channels */
+ regval = FIELD_GET(AMS_REG_SEQ0_MASK, scan_mask);
+ writel(regval, ams->ps_base + AMS_REG_SEQ_CH0);
+
+ regval = FIELD_GET(AMS_REG_SEQ2_MASK, scan_mask);
+ writel(regval, ams->ps_base + AMS_REG_SEQ_CH2);
+
+ /* set continuous sequence mode */
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_CONTINUOUS);
+ }
+
+ if (ams->pl_base) {
+ /* put sysmon in a soft reset to change the sequence */
+ ams_pl_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_DEFAULT);
+
+ /* configure basic channels */
+ scan_mask = FIELD_GET(AMS_PL_SEQ_MASK, scan_mask);
+
+ regval = FIELD_GET(AMS_REG_SEQ0_MASK, scan_mask);
+ writel(regval, ams->pl_base + AMS_REG_SEQ_CH0);
+
+ regval = FIELD_GET(AMS_REG_SEQ1_MASK, scan_mask);
+ writel(regval, ams->pl_base + AMS_REG_SEQ_CH1);
+
+ regval = FIELD_GET(AMS_REG_SEQ2_MASK, scan_mask);
+ writel(regval, ams->pl_base + AMS_REG_SEQ_CH2);
+
+ /* set continuous sequence mode */
+ ams_pl_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_CONTINUOUS);
+ }
+}
+
+static int ams_init_device(struct ams *ams)
+{
+ u32 expect = AMS_PS_CSTS_PS_READY;
+ u32 reg, value;
+ int ret;
+
+ /* reset AMS */
+ if (ams->ps_base) {
+ writel(AMS_PS_RESET_VALUE, ams->ps_base + AMS_VP_VN);
+
+ ret = readl_poll_timeout(ams->base + AMS_PS_CSTS, reg, (reg & expect),
+ AMS_INIT_POLL_TIME_US, AMS_INIT_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ /* put sysmon in a default state */
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_DEFAULT);
+ }
+
+ if (ams->pl_base) {
+ value = readl(ams->base + AMS_PL_CSTS);
+ if (value == 0)
+ return 0;
+
+ writel(AMS_PL_RESET_VALUE, ams->pl_base + AMS_VP_VN);
+
+ /* put sysmon in a default state */
+ ams_pl_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_DEFAULT);
+ }
+
+ ams_disable_all_alarms(ams);
+
+ /* Disable interrupt */
+ ams_update_intrmask(ams, AMS_ALARM_MASK, AMS_ALARM_MASK);
+
+ /* Clear any pending interrupt */
+ writel(AMS_ISR0_ALARM_MASK, ams->base + AMS_ISR_0);
+ writel(AMS_ISR1_ALARM_MASK, ams->base + AMS_ISR_1);
+
+ return 0;
+}
+
+static int ams_enable_single_channel(struct ams *ams, unsigned int offset)
+{
+ u8 channel_num;
+
+ switch (offset) {
+ case AMS_VCC_PSPLL0:
+ channel_num = AMS_VCC_PSPLL0_CH;
+ break;
+ case AMS_VCC_PSPLL3:
+ channel_num = AMS_VCC_PSPLL3_CH;
+ break;
+ case AMS_VCCINT:
+ channel_num = AMS_VCCINT_CH;
+ break;
+ case AMS_VCCBRAM:
+ channel_num = AMS_VCCBRAM_CH;
+ break;
+ case AMS_VCCAUX:
+ channel_num = AMS_VCCAUX_CH;
+ break;
+ case AMS_PSDDRPLL:
+ channel_num = AMS_PSDDRPLL_CH;
+ break;
+ case AMS_PSINTFPDDR:
+ channel_num = AMS_PSINTFPDDR_CH;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* set single channel, sequencer off mode */
+ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK,
+ AMS_CONF1_SEQ_SINGLE_CHANNEL);
+
+ /* write the channel number */
+ ams_ps_update_reg(ams, AMS_REG_CONFIG0, AMS_CONF0_CHANNEL_NUM_MASK,
+ channel_num);
+
+ return 0;
+}
+
+static int ams_read_vcc_reg(struct ams *ams, unsigned int offset, u32 *data)
+{
+ u32 expect = AMS_ISR1_EOC_MASK;
+ u32 reg;
+ int ret;
+
+ ret = ams_enable_single_channel(ams, offset);
+ if (ret)
+ return ret;
+
+ ret = readl_poll_timeout(ams->base + AMS_ISR_1, reg, (reg & expect),
+ AMS_INIT_POLL_TIME_US, AMS_INIT_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ *data = readl(ams->base + offset);
+
+ return 0;
+}
+
+static int ams_get_ps_scale(int address)
+{
+ int val;
+
+ switch (address) {
+ case AMS_SUPPLY1:
+ case AMS_SUPPLY2:
+ case AMS_SUPPLY3:
+ case AMS_SUPPLY4:
+ case AMS_SUPPLY9:
+ case AMS_SUPPLY10:
+ case AMS_VCCAMS:
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_SUPPLY5:
+ case AMS_SUPPLY6:
+ case AMS_SUPPLY7:
+ case AMS_SUPPLY8:
+ val = AMS_SUPPLY_SCALE_6VOLT_mV;
+ break;
+ default:
+ val = AMS_SUPPLY_SCALE_1VOLT_mV;
+ break;
+ }
+
+ return val;
+}
+
+static int ams_get_pl_scale(struct ams *ams, int address)
+{
+ int val, regval;
+
+ switch (address) {
+ case AMS_SUPPLY1:
+ case AMS_SUPPLY2:
+ case AMS_SUPPLY3:
+ case AMS_SUPPLY4:
+ case AMS_SUPPLY5:
+ case AMS_SUPPLY6:
+ case AMS_VCCAMS:
+ case AMS_VREFP:
+ case AMS_VREFN:
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_SUPPLY7:
+ regval = readl(ams->pl_base + AMS_REG_CONFIG4);
+ if (FIELD_GET(AMS_VUSER0_MASK, regval))
+ val = AMS_SUPPLY_SCALE_6VOLT_mV;
+ else
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_SUPPLY8:
+ regval = readl(ams->pl_base + AMS_REG_CONFIG4);
+ if (FIELD_GET(AMS_VUSER1_MASK, regval))
+ val = AMS_SUPPLY_SCALE_6VOLT_mV;
+ else
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_SUPPLY9:
+ regval = readl(ams->pl_base + AMS_REG_CONFIG4);
+ if (FIELD_GET(AMS_VUSER2_MASK, regval))
+ val = AMS_SUPPLY_SCALE_6VOLT_mV;
+ else
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_SUPPLY10:
+ regval = readl(ams->pl_base + AMS_REG_CONFIG4);
+ if (FIELD_GET(AMS_VUSER3_MASK, regval))
+ val = AMS_SUPPLY_SCALE_6VOLT_mV;
+ else
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ case AMS_VP_VN:
+ case AMS_REG_VAUX(0) ... AMS_REG_VAUX(15):
+ val = AMS_SUPPLY_SCALE_1VOLT_mV;
+ break;
+ default:
+ val = AMS_SUPPLY_SCALE_1VOLT_mV;
+ break;
+ }
+
+ return val;
+}
+
+static int ams_get_ctrl_scale(int address)
+{
+ int val;
+
+ switch (address) {
+ case AMS_VCC_PSPLL0:
+ case AMS_VCC_PSPLL3:
+ case AMS_VCCINT:
+ case AMS_VCCBRAM:
+ case AMS_VCCAUX:
+ case AMS_PSDDRPLL:
+ case AMS_PSINTFPDDR:
+ val = AMS_SUPPLY_SCALE_3VOLT_mV;
+ break;
+ default:
+ val = AMS_SUPPLY_SCALE_1VOLT_mV;
+ break;
+ }
+
+ return val;
+}
+
+static int ams_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2, long mask)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ int ret;
+
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
+ mutex_lock(&ams->lock);
+ if (chan->scan_index >= AMS_CTRL_SEQ_BASE) {
+ ret = ams_read_vcc_reg(ams, chan->address, val);
+ if (ret)
+ goto unlock_mutex;
+ ams_enable_channel_sequence(indio_dev);
+ } else if (chan->scan_index >= AMS_PS_SEQ_MAX)
+ *val = readl(ams->pl_base + chan->address);
+ else
+ *val = readl(ams->ps_base + chan->address);
+
+ ret = IIO_VAL_INT;
+unlock_mutex:
+ mutex_unlock(&ams->lock);
+ return ret;
+ case IIO_CHAN_INFO_SCALE:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (chan->scan_index < AMS_PS_SEQ_MAX)
+ *val = ams_get_ps_scale(chan->address);
+ else if (chan->scan_index >= AMS_PS_SEQ_MAX &&
+ chan->scan_index < AMS_CTRL_SEQ_BASE)
+ *val = ams_get_pl_scale(ams, chan->address);
+ else
+ *val = ams_get_ctrl_scale(chan->address);
+
+ *val2 = AMS_SUPPLY_SCALE_DIV_BIT;
+ return IIO_VAL_FRACTIONAL_LOG2;
+ case IIO_TEMP:
+ *val = AMS_TEMP_SCALE;
+ *val2 = AMS_TEMP_SCALE_DIV_BIT;
+ return IIO_VAL_FRACTIONAL_LOG2;
+ default:
+ return -EINVAL;
+ }
+ case IIO_CHAN_INFO_OFFSET:
+ /* Only the temperature channel has an offset */
+ *val = AMS_TEMP_OFFSET;
+ return IIO_VAL_INT;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ams_get_alarm_offset(int scan_index, enum iio_event_direction dir)
+{
+ int offset;
+
+ if (scan_index >= AMS_PS_SEQ_MAX)
+ scan_index -= AMS_PS_SEQ_MAX;
+
+ if (dir == IIO_EV_DIR_FALLING) {
+ if (scan_index < AMS_SEQ_SUPPLY7)
+ offset = AMS_ALARM_THRESHOLD_OFF_10;
+ else
+ offset = AMS_ALARM_THRESHOLD_OFF_20;
+ } else {
+ offset = 0;
+ }
+
+ switch (scan_index) {
+ case AMS_SEQ_TEMP:
+ return AMS_ALARM_TEMP + offset;
+ case AMS_SEQ_SUPPLY1:
+ return AMS_ALARM_SUPPLY1 + offset;
+ case AMS_SEQ_SUPPLY2:
+ return AMS_ALARM_SUPPLY2 + offset;
+ case AMS_SEQ_SUPPLY3:
+ return AMS_ALARM_SUPPLY3 + offset;
+ case AMS_SEQ_SUPPLY4:
+ return AMS_ALARM_SUPPLY4 + offset;
+ case AMS_SEQ_SUPPLY5:
+ return AMS_ALARM_SUPPLY5 + offset;
+ case AMS_SEQ_SUPPLY6:
+ return AMS_ALARM_SUPPLY6 + offset;
+ case AMS_SEQ_SUPPLY7:
+ return AMS_ALARM_SUPPLY7 + offset;
+ case AMS_SEQ_SUPPLY8:
+ return AMS_ALARM_SUPPLY8 + offset;
+ case AMS_SEQ_SUPPLY9:
+ return AMS_ALARM_SUPPLY9 + offset;
+ case AMS_SEQ_SUPPLY10:
+ return AMS_ALARM_SUPPLY10 + offset;
+ case AMS_SEQ_VCCAMS:
+ return AMS_ALARM_VCCAMS + offset;
+ case AMS_SEQ_TEMP_REMOTE:
+ return AMS_ALARM_TEMP_REMOTE + offset;
+ default:
+ return 0;
+ }
+}
+
+static const struct iio_chan_spec *ams_event_to_channel(struct iio_dev *dev,
+ u32 event)
+{
+ int scan_index = 0, i;
+
+ if (event >= AMS_PL_ALARM_START) {
+ event -= AMS_PL_ALARM_START;
+ scan_index = AMS_PS_SEQ_MAX;
+ }
+
+ switch (event) {
+ case AMS_ALARM_BIT_TEMP:
+ scan_index += AMS_SEQ_TEMP;
+ break;
+ case AMS_ALARM_BIT_SUPPLY1:
+ scan_index += AMS_SEQ_SUPPLY1;
+ break;
+ case AMS_ALARM_BIT_SUPPLY2:
+ scan_index += AMS_SEQ_SUPPLY2;
+ break;
+ case AMS_ALARM_BIT_SUPPLY3:
+ scan_index += AMS_SEQ_SUPPLY3;
+ break;
+ case AMS_ALARM_BIT_SUPPLY4:
+ scan_index += AMS_SEQ_SUPPLY4;
+ break;
+ case AMS_ALARM_BIT_SUPPLY5:
+ scan_index += AMS_SEQ_SUPPLY5;
+ break;
+ case AMS_ALARM_BIT_SUPPLY6:
+ scan_index += AMS_SEQ_SUPPLY6;
+ break;
+ case AMS_ALARM_BIT_SUPPLY7:
+ scan_index += AMS_SEQ_SUPPLY7;
+ break;
+ case AMS_ALARM_BIT_SUPPLY8:
+ scan_index += AMS_SEQ_SUPPLY8;
+ break;
+ case AMS_ALARM_BIT_SUPPLY9:
+ scan_index += AMS_SEQ_SUPPLY9;
+ break;
+ case AMS_ALARM_BIT_SUPPLY10:
+ scan_index += AMS_SEQ_SUPPLY10;
+ break;
+ case AMS_ALARM_BIT_VCCAMS:
+ scan_index += AMS_SEQ_VCCAMS;
+ break;
+ case AMS_ALARM_BIT_TEMP_REMOTE:
+ scan_index += AMS_SEQ_TEMP_REMOTE;
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < dev->num_channels; i++)
+ if (dev->channels[i].scan_index == scan_index)
+ break;
+
+ return &dev->channels[i];
+}
+
+static int ams_get_alarm_mask(int scan_index)
+{
+ int bit = 0;
+
+ if (scan_index >= AMS_PS_SEQ_MAX) {
+ bit = AMS_PL_ALARM_START;
+ scan_index -= AMS_PS_SEQ_MAX;
+ }
+
+ switch (scan_index) {
+ case AMS_SEQ_TEMP:
+ return BIT(AMS_ALARM_BIT_TEMP + bit);
+ case AMS_SEQ_SUPPLY1:
+ return BIT(AMS_ALARM_BIT_SUPPLY1 + bit);
+ case AMS_SEQ_SUPPLY2:
+ return BIT(AMS_ALARM_BIT_SUPPLY2 + bit);
+ case AMS_SEQ_SUPPLY3:
+ return BIT(AMS_ALARM_BIT_SUPPLY3 + bit);
+ case AMS_SEQ_SUPPLY4:
+ return BIT(AMS_ALARM_BIT_SUPPLY4 + bit);
+ case AMS_SEQ_SUPPLY5:
+ return BIT(AMS_ALARM_BIT_SUPPLY5 + bit);
+ case AMS_SEQ_SUPPLY6:
+ return BIT(AMS_ALARM_BIT_SUPPLY6 + bit);
+ case AMS_SEQ_SUPPLY7:
+ return BIT(AMS_ALARM_BIT_SUPPLY7 + bit);
+ case AMS_SEQ_SUPPLY8:
+ return BIT(AMS_ALARM_BIT_SUPPLY8 + bit);
+ case AMS_SEQ_SUPPLY9:
+ return BIT(AMS_ALARM_BIT_SUPPLY9 + bit);
+ case AMS_SEQ_SUPPLY10:
+ return BIT(AMS_ALARM_BIT_SUPPLY10 + bit);
+ case AMS_SEQ_VCCAMS:
+ return BIT(AMS_ALARM_BIT_VCCAMS + bit);
+ case AMS_SEQ_TEMP_REMOTE:
+ return BIT(AMS_ALARM_BIT_TEMP_REMOTE + bit);
+ default:
+ return 0;
+ }
+}
+
+static int ams_read_event_config(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan,
+ enum iio_event_type type,
+ enum iio_event_direction dir)
+{
+ struct ams *ams = iio_priv(indio_dev);
+
+ return !!(ams->alarm_mask & ams_get_alarm_mask(chan->scan_index));
+}
+
+static int ams_write_event_config(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan,
+ enum iio_event_type type,
+ enum iio_event_direction dir,
+ int state)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ unsigned int alarm;
+
+ alarm = ams_get_alarm_mask(chan->scan_index);
+
+ mutex_lock(&ams->lock);
+
+ if (state)
+ ams->alarm_mask |= alarm;
+ else
+ ams->alarm_mask &= ~alarm;
+
+ ams_update_alarm(ams, ams->alarm_mask);
+
+ mutex_unlock(&ams->lock);
+
+ return 0;
+}
+
+static int ams_read_event_value(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan,
+ enum iio_event_type type,
+ enum iio_event_direction dir,
+ enum iio_event_info info, int *val, int *val2)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ unsigned int offset = ams_get_alarm_offset(chan->scan_index, dir);
+
+ mutex_lock(&ams->lock);
+
+ if (chan->scan_index >= AMS_PS_SEQ_MAX)
+ *val = readl(ams->pl_base + offset);
+ else
+ *val = readl(ams->ps_base + offset);
+
+ mutex_unlock(&ams->lock);
+
+ return IIO_VAL_INT;
+}
+
+static int ams_write_event_value(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan,
+ enum iio_event_type type,
+ enum iio_event_direction dir,
+ enum iio_event_info info, int val, int val2)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ unsigned int offset;
+
+ mutex_lock(&ams->lock);
+
+ /* Set temperature channel threshold to direct threshold */
+ if (chan->type == IIO_TEMP) {
+ offset = ams_get_alarm_offset(chan->scan_index, IIO_EV_DIR_FALLING);
+
+ if (chan->scan_index >= AMS_PS_SEQ_MAX)
+ ams_pl_update_reg(ams, offset,
+ AMS_ALARM_THR_DIRECT_MASK,
+ AMS_ALARM_THR_DIRECT_MASK);
+ else
+ ams_ps_update_reg(ams, offset,
+ AMS_ALARM_THR_DIRECT_MASK,
+ AMS_ALARM_THR_DIRECT_MASK);
+ }
+
+ offset = ams_get_alarm_offset(chan->scan_index, dir);
+ if (chan->scan_index >= AMS_PS_SEQ_MAX)
+ writel(val, ams->pl_base + offset);
+ else
+ writel(val, ams->ps_base + offset);
+
+ mutex_unlock(&ams->lock);
+
+ return 0;
+}
+
+static void ams_handle_event(struct iio_dev *indio_dev, u32 event)
+{
+ const struct iio_chan_spec *chan;
+
+ chan = ams_event_to_channel(indio_dev, event);
+
+ if (chan->type == IIO_TEMP) {
+ /*
+ * The temperature channel only supports over-temperature
+ * events.
+ */
+ iio_push_event(indio_dev,
+ IIO_UNMOD_EVENT_CODE(chan->type, chan->channel,
+ IIO_EV_TYPE_THRESH,
+ IIO_EV_DIR_RISING),
+ iio_get_time_ns(indio_dev));
+ } else {
+ /*
+ * For other channels we don't know whether it is a upper or
+ * lower threshold event. Userspace will have to check the
+ * channel value if it wants to know.
+ */
+ iio_push_event(indio_dev,
+ IIO_UNMOD_EVENT_CODE(chan->type, chan->channel,
+ IIO_EV_TYPE_THRESH,
+ IIO_EV_DIR_EITHER),
+ iio_get_time_ns(indio_dev));
+ }
+}
+
+static void ams_handle_events(struct iio_dev *indio_dev, unsigned long events)
+{
+ unsigned int bit;
+
+ for_each_set_bit(bit, &events, AMS_NO_OF_ALARMS)
+ ams_handle_event(indio_dev, bit);
+}
+
+/**
+ * ams_unmask_worker - ams alarm interrupt unmask worker
+ * @work: work to be done
+ *
+ * The ZynqMP threshold interrupts are level sensitive. Since we can't make the
+ * threshold condition go way from within the interrupt handler, this means as
+ * soon as a threshold condition is present we would enter the interrupt handler
+ * again and again. To work around this we mask all active threshold interrupts
+ * in the interrupt handler and start a timer. In this timer we poll the
+ * interrupt status and only if the interrupt is inactive we unmask it again.
+ */
+static void ams_unmask_worker(struct work_struct *work)
+{
+ struct ams *ams = container_of(work, struct ams, ams_unmask_work.work);
+ unsigned int status, unmask;
+
+ spin_lock_irq(&ams->intr_lock);
+
+ status = readl(ams->base + AMS_ISR_0);
+
+ /* Clear those bits which are not active anymore */
+ unmask = (ams->current_masked_alarm ^ status) & ams->current_masked_alarm;
+
+ /* Clear status of disabled alarm */
+ unmask |= ams->intr_mask;
+
+ ams->current_masked_alarm &= status;
+
+ /* Also clear those which are masked out anyway */
+ ams->current_masked_alarm &= ~ams->intr_mask;
+
+ /* Clear the interrupts before we unmask them */
+ writel(unmask, ams->base + AMS_ISR_0);
+
+ ams_update_intrmask(ams, ~AMS_ALARM_MASK, ~AMS_ALARM_MASK);
+
+ spin_unlock_irq(&ams->intr_lock);
+
+ /* If still pending some alarm re-trigger the timer */
+ if (ams->current_masked_alarm)
+ schedule_delayed_work(&ams->ams_unmask_work,
+ msecs_to_jiffies(AMS_UNMASK_TIMEOUT_MS));
+}
+
+static irqreturn_t ams_irq(int irq, void *data)
+{
+ struct iio_dev *indio_dev = data;
+ struct ams *ams = iio_priv(indio_dev);
+ u32 isr0;
+
+ spin_lock(&ams->intr_lock);
+
+ isr0 = readl(ams->base + AMS_ISR_0);
+
+ /* Only process alarms that are not masked */
+ isr0 &= ~((ams->intr_mask & AMS_ISR0_ALARM_MASK) | ams->current_masked_alarm);
+ if (!isr0) {
+ spin_unlock(&ams->intr_lock);
+ return IRQ_NONE;
+ }
+
+ /* Clear interrupt */
+ writel(isr0, ams->base + AMS_ISR_0);
+
+ /* Mask the alarm interrupts until cleared */
+ ams->current_masked_alarm |= isr0;
+ ams_update_intrmask(ams, ~AMS_ALARM_MASK, ~AMS_ALARM_MASK);
+
+ ams_handle_events(indio_dev, isr0);
+
+ schedule_delayed_work(&ams->ams_unmask_work,
+ msecs_to_jiffies(AMS_UNMASK_TIMEOUT_MS));
+
+ spin_unlock(&ams->intr_lock);
+
+ return IRQ_HANDLED;
+}
+
+static const struct iio_event_spec ams_temp_events[] = {
+ {
+ .type = IIO_EV_TYPE_THRESH,
+ .dir = IIO_EV_DIR_RISING,
+ .mask_separate = BIT(IIO_EV_INFO_ENABLE) | BIT(IIO_EV_INFO_VALUE),
+ },
+};
+
+static const struct iio_event_spec ams_voltage_events[] = {
+ {
+ .type = IIO_EV_TYPE_THRESH,
+ .dir = IIO_EV_DIR_RISING,
+ .mask_separate = BIT(IIO_EV_INFO_VALUE),
+ },
+ {
+ .type = IIO_EV_TYPE_THRESH,
+ .dir = IIO_EV_DIR_FALLING,
+ .mask_separate = BIT(IIO_EV_INFO_VALUE),
+ },
+ {
+ .type = IIO_EV_TYPE_THRESH,
+ .dir = IIO_EV_DIR_EITHER,
+ .mask_separate = BIT(IIO_EV_INFO_ENABLE),
+ },
+};
+
+static const struct iio_chan_spec ams_ps_channels[] = {
+ AMS_PS_CHAN_TEMP(AMS_SEQ_TEMP, AMS_TEMP),
+ AMS_PS_CHAN_TEMP(AMS_SEQ_TEMP_REMOTE, AMS_TEMP_REMOTE),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY1, AMS_SUPPLY1),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY2, AMS_SUPPLY2),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY3, AMS_SUPPLY3),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY4, AMS_SUPPLY4),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY5, AMS_SUPPLY5),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY6, AMS_SUPPLY6),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY7, AMS_SUPPLY7),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY8, AMS_SUPPLY8),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY9, AMS_SUPPLY9),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_SUPPLY10, AMS_SUPPLY10),
+ AMS_PS_CHAN_VOLTAGE(AMS_SEQ_VCCAMS, AMS_VCCAMS),
+};
+
+static const struct iio_chan_spec ams_pl_channels[] = {
+ AMS_PL_CHAN_TEMP(AMS_SEQ_TEMP, AMS_TEMP),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY1, AMS_SUPPLY1, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY2, AMS_SUPPLY2, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_VREFP, AMS_VREFP, false),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_VREFN, AMS_VREFN, false),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY3, AMS_SUPPLY3, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY4, AMS_SUPPLY4, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY5, AMS_SUPPLY5, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY6, AMS_SUPPLY6, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_VCCAMS, AMS_VCCAMS, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_VP_VN, AMS_VP_VN, false),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY7, AMS_SUPPLY7, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY8, AMS_SUPPLY8, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY9, AMS_SUPPLY9, true),
+ AMS_PL_CHAN_VOLTAGE(AMS_SEQ_SUPPLY10, AMS_SUPPLY10, true),
+ AMS_PL_AUX_CHAN_VOLTAGE(0),
+ AMS_PL_AUX_CHAN_VOLTAGE(1),
+ AMS_PL_AUX_CHAN_VOLTAGE(2),
+ AMS_PL_AUX_CHAN_VOLTAGE(3),
+ AMS_PL_AUX_CHAN_VOLTAGE(4),
+ AMS_PL_AUX_CHAN_VOLTAGE(5),
+ AMS_PL_AUX_CHAN_VOLTAGE(6),
+ AMS_PL_AUX_CHAN_VOLTAGE(7),
+ AMS_PL_AUX_CHAN_VOLTAGE(8),
+ AMS_PL_AUX_CHAN_VOLTAGE(9),
+ AMS_PL_AUX_CHAN_VOLTAGE(10),
+ AMS_PL_AUX_CHAN_VOLTAGE(11),
+ AMS_PL_AUX_CHAN_VOLTAGE(12),
+ AMS_PL_AUX_CHAN_VOLTAGE(13),
+ AMS_PL_AUX_CHAN_VOLTAGE(14),
+ AMS_PL_AUX_CHAN_VOLTAGE(15),
+};
+
+static const struct iio_chan_spec ams_ctrl_channels[] = {
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_VCC_PSPLL, AMS_VCC_PSPLL0),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_VCC_PSBATT, AMS_VCC_PSPLL3),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_VCCINT, AMS_VCCINT),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_VCCBRAM, AMS_VCCBRAM),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_VCCAUX, AMS_VCCAUX),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_PSDDRPLL, AMS_PSDDRPLL),
+ AMS_CTRL_CHAN_VOLTAGE(AMS_SEQ_INTDDR, AMS_PSINTFPDDR),
+};
+
+static int ams_get_ext_chan(struct fwnode_handle *chan_node,
+ struct iio_chan_spec *channels, int num_channels)
+{
+ struct iio_chan_spec *chan;
+ struct fwnode_handle *child;
+ unsigned int reg, ext_chan;
+ int ret;
+
+ fwnode_for_each_child_node(chan_node, child) {
+ ret = fwnode_property_read_u32(child, "reg", &reg);
+ if (ret || reg > AMS_PL_MAX_EXT_CHANNEL + 30)
+ continue;
+
+ chan = &channels[num_channels];
+ ext_chan = reg + AMS_PL_MAX_FIXED_CHANNEL - 30;
+ memcpy(chan, &ams_pl_channels[ext_chan], sizeof(*channels));
+
+ if (fwnode_property_read_bool(child, "xlnx,bipolar"))
+ chan->scan_type.sign = 's';
+
+ num_channels++;
+ }
+
+ return num_channels;
+}
+
+static void ams_iounmap_ps(void *data)
+{
+ struct ams *ams = data;
+
+ iounmap(ams->ps_base);
+}
+
+static void ams_iounmap_pl(void *data)
+{
+ struct ams *ams = data;
+
+ iounmap(ams->pl_base);
+}
+
+static int ams_init_module(struct iio_dev *indio_dev,
+ struct fwnode_handle *fwnode,
+ struct iio_chan_spec *channels)
+{
+ struct device *dev = indio_dev->dev.parent;
+ struct ams *ams = iio_priv(indio_dev);
+ int num_channels = 0;
+ int ret;
+
+ if (fwnode_property_match_string(fwnode, "compatible",
+ "xlnx,zynqmp-ams-ps") == 0) {
+ ams->ps_base = fwnode_iomap(fwnode, 0);
+ if (!ams->ps_base)
+ return -ENXIO;
+ ret = devm_add_action_or_reset(dev, ams_iounmap_ps, ams);
+ if (ret < 0)
+ return ret;
+
+ /* add PS channels to iio device channels */
+ memcpy(channels, ams_ps_channels, sizeof(ams_ps_channels));
+ } else if (fwnode_property_match_string(fwnode, "compatible",
+ "xlnx,zynqmp-ams-pl") == 0) {
+ ams->pl_base = fwnode_iomap(fwnode, 0);
+ if (!ams->pl_base)
+ return -ENXIO;
+
+ ret = devm_add_action_or_reset(dev, ams_iounmap_pl, ams);
+ if (ret < 0)
+ return ret;
+
+ /* Copy only first 10 fix channels */
+ memcpy(channels, ams_pl_channels, AMS_PL_MAX_FIXED_CHANNEL * sizeof(*channels));
+ num_channels += AMS_PL_MAX_FIXED_CHANNEL;
+ num_channels = ams_get_ext_chan(fwnode, channels,
+ num_channels);
+ } else if (fwnode_property_match_string(fwnode, "compatible",
+ "xlnx,zynqmp-ams") == 0) {
+ /* add AMS channels to iio device channels */
+ memcpy(channels, ams_ctrl_channels, sizeof(ams_ctrl_channels));
+ num_channels += ARRAY_SIZE(ams_ctrl_channels);
+ } else {
+ return -EINVAL;
+ }
+
+ return num_channels;
+}
+
+static int ams_parse_firmware(struct iio_dev *indio_dev)
+{
+ struct ams *ams = iio_priv(indio_dev);
+ struct iio_chan_spec *ams_channels, *dev_channels;
+ struct device *dev = indio_dev->dev.parent;
+ struct fwnode_handle *child = NULL;
+ struct fwnode_handle *fwnode = dev_fwnode(dev);
+ size_t ams_size, dev_size;
+ int ret, ch_cnt = 0, i, rising_off, falling_off;
+ unsigned int num_channels = 0;
+
+ ams_size = ARRAY_SIZE(ams_ps_channels) + ARRAY_SIZE(ams_pl_channels) +
+ ARRAY_SIZE(ams_ctrl_channels);
+
+ /* Initialize buffer for channel specification */
+ ams_channels = devm_kcalloc(dev, ams_size, sizeof(*ams_channels), GFP_KERNEL);
+ if (!ams_channels)
+ return -ENOMEM;
+
+ if (fwnode_device_is_available(fwnode)) {
+ ret = ams_init_module(indio_dev, fwnode, ams_channels);
+ if (ret < 0)
+ return ret;
+
+ num_channels += ret;
+ }
+
+ fwnode_for_each_child_node(fwnode, child) {
+ if (fwnode_device_is_available(child)) {
+ ret = ams_init_module(indio_dev, child, ams_channels + num_channels);
+ if (ret < 0) {
+ fwnode_handle_put(child);
+ return ret;
+ }
+
+ num_channels += ret;
+ }
+ }
+
+ for (i = 0; i < num_channels; i++) {
+ ams_channels[i].channel = ch_cnt++;
+
+ if (ams_channels[i].scan_index < AMS_CTRL_SEQ_BASE) {
+ /* set threshold to max and min for each channel */
+ falling_off =
+ ams_get_alarm_offset(ams_channels[i].scan_index,
+ IIO_EV_DIR_FALLING);
+ rising_off =
+ ams_get_alarm_offset(ams_channels[i].scan_index,
+ IIO_EV_DIR_RISING);
+ if (ams_channels[i].scan_index >= AMS_PS_SEQ_MAX) {
+ writel(AMS_ALARM_THR_MIN,
+ ams->pl_base + falling_off);
+ writel(AMS_ALARM_THR_MAX,
+ ams->pl_base + rising_off);
+ } else {
+ writel(AMS_ALARM_THR_MIN,
+ ams->ps_base + falling_off);
+ writel(AMS_ALARM_THR_MAX,
+ ams->ps_base + rising_off);
+ }
+ }
+ }
+
+ dev_size = array_size(sizeof(*dev_channels), num_channels);
+ if (dev_size == SIZE_MAX)
+ return -ENOMEM;
+
+ dev_channels = devm_krealloc(dev, ams_channels, dev_size, GFP_KERNEL);
+ if (!dev_channels)
+ ret = -ENOMEM;
+
+ indio_dev->channels = dev_channels;
+ indio_dev->num_channels = num_channels;
+
+ return 0;
+}
+
+static const struct iio_info iio_ams_info = {
+ .read_raw = &ams_read_raw,
+ .read_event_config = &ams_read_event_config,
+ .write_event_config = &ams_write_event_config,
+ .read_event_value = &ams_read_event_value,
+ .write_event_value = &ams_write_event_value,
+};
+
+static const struct of_device_id ams_of_match_table[] = {
+ { .compatible = "xlnx,zynqmp-ams" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, ams_of_match_table);
+
+static void ams_clk_disable_unprepare(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static void ams_cancel_delayed_work(void *data)
+{
+ cancel_delayed_work(data);
+}
+
+static int ams_probe(struct platform_device *pdev)
+{
+ struct iio_dev *indio_dev;
+ struct ams *ams;
+ int ret;
+ int irq;
+
+ indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*ams));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ ams = iio_priv(indio_dev);
+ mutex_init(&ams->lock);
+ spin_lock_init(&ams->intr_lock);
+
+ indio_dev->name = "xilinx-ams";
+
+ indio_dev->info = &iio_ams_info;
+ indio_dev->modes = INDIO_DIRECT_MODE;
+
+ ams->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ams->base))
+ return PTR_ERR(ams->base);
+
+ ams->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(ams->clk))
+ return PTR_ERR(ams->clk);
+
+ ret = clk_prepare_enable(ams->clk);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_add_action_or_reset(&pdev->dev, ams_clk_disable_unprepare, ams->clk);
+ if (ret < 0)
+ return ret;
+
+ INIT_DELAYED_WORK(&ams->ams_unmask_work, ams_unmask_worker);
+ ret = devm_add_action_or_reset(&pdev->dev, ams_cancel_delayed_work,
+ &ams->ams_unmask_work);
+ if (ret < 0)
+ return ret;
+
+ ret = ams_parse_firmware(indio_dev);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failure in parsing DT\n");
+
+ ret = ams_init_device(ams);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to initialize AMS\n");
+
+ ams_enable_channel_sequence(indio_dev);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return ret;
+
+ ret = devm_request_irq(&pdev->dev, irq, &ams_irq, 0, "ams-irq",
+ indio_dev);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "failed to register interrupt\n");
+
+ platform_set_drvdata(pdev, indio_dev);
+
+ return devm_iio_device_register(&pdev->dev, indio_dev);
+}
+
+static int __maybe_unused ams_suspend(struct device *dev)
+{
+ struct ams *ams = iio_priv(dev_get_drvdata(dev));
+
+ clk_disable_unprepare(ams->clk);
+
+ return 0;
+}
+
+static int __maybe_unused ams_resume(struct device *dev)
+{
+ struct ams *ams = iio_priv(dev_get_drvdata(dev));
+
+ return clk_prepare_enable(ams->clk);
+}
+
+static SIMPLE_DEV_PM_OPS(ams_pm_ops, ams_suspend, ams_resume);
+
+static struct platform_driver ams_driver = {
+ .probe = ams_probe,
+ .driver = {
+ .name = "xilinx-ams",
+ .pm = &ams_pm_ops,
+ .of_match_table = ams_of_match_table,
+ },
+};
+module_platform_driver(ams_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xilinx, Inc.");
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 83bea5ef765d..823c8e5f9809 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -107,6 +107,7 @@ static const unsigned int XADC_ZYNQ_UNMASK_TIMEOUT = 500;
#define XADC_AXI_INT_ALARM_MASK 0x3c0f
#define XADC_FLAGS_BUFFERED BIT(0)
+#define XADC_FLAGS_IRQ_OPTIONAL BIT(1)
/*
* The XADC hardware supports a samplerate of up to 1MSPS. Unfortunately it does
@@ -562,7 +563,7 @@ static const struct xadc_ops xadc_7s_axi_ops = {
.get_dclk_rate = xadc_axi_get_dclk,
.update_alarm = xadc_axi_update_alarm,
.interrupt_handler = xadc_axi_interrupt_handler,
- .flags = XADC_FLAGS_BUFFERED,
+ .flags = XADC_FLAGS_BUFFERED | XADC_FLAGS_IRQ_OPTIONAL,
.type = XADC_TYPE_S7,
};
@@ -573,7 +574,7 @@ static const struct xadc_ops xadc_us_axi_ops = {
.get_dclk_rate = xadc_axi_get_dclk,
.update_alarm = xadc_axi_update_alarm,
.interrupt_handler = xadc_axi_interrupt_handler,
- .flags = XADC_FLAGS_BUFFERED,
+ .flags = XADC_FLAGS_BUFFERED | XADC_FLAGS_IRQ_OPTIONAL,
.type = XADC_TYPE_US,
};
@@ -943,7 +944,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev,
*val = 1000;
break;
}
- *val2 = chan->scan_type.realbits;
+ *val2 = bits;
return IIO_VAL_FRACTIONAL_LOG2;
case IIO_TEMP:
/* Temp in C = (val * 503.975) / 2**bits - 273.15 */
@@ -1182,7 +1183,7 @@ static const struct of_device_id xadc_of_match_table[] = {
MODULE_DEVICE_TABLE(of, xadc_of_match_table);
static int xadc_parse_dt(struct iio_dev *indio_dev, struct device_node *np,
- unsigned int *conf)
+ unsigned int *conf, int irq)
{
struct device *dev = indio_dev->dev.parent;
struct xadc *xadc = iio_priv(indio_dev);
@@ -1195,6 +1196,7 @@ static int xadc_parse_dt(struct iio_dev *indio_dev, struct device_node *np,
u32 ext_mux_chan;
u32 reg;
int ret;
+ int i;
*conf = 0;
@@ -1273,6 +1275,14 @@ static int xadc_parse_dt(struct iio_dev *indio_dev, struct device_node *np,
}
of_node_put(chan_node);
+ /* No IRQ => no events */
+ if (irq <= 0) {
+ for (i = 0; i < num_channels; i++) {
+ channels[i].event_spec = NULL;
+ channels[i].num_event_specs = 0;
+ }
+ }
+
indio_dev->num_channels = num_channels;
indio_dev->channels = devm_krealloc(dev, channels,
sizeof(*channels) * num_channels,
@@ -1307,6 +1317,7 @@ static int xadc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
const struct of_device_id *id;
+ const struct xadc_ops *ops;
struct iio_dev *indio_dev;
unsigned int bipolar_mask;
unsigned int conf0;
@@ -1322,9 +1333,12 @@ static int xadc_probe(struct platform_device *pdev)
if (!id)
return -EINVAL;
- irq = platform_get_irq(pdev, 0);
- if (irq <= 0)
- return -ENXIO;
+ ops = id->data;
+
+ irq = platform_get_irq_optional(pdev, 0);
+ if (irq < 0 &&
+ (irq != -ENXIO || !(ops->flags & XADC_FLAGS_IRQ_OPTIONAL)))
+ return irq;
indio_dev = devm_iio_device_alloc(dev, sizeof(*xadc));
if (!indio_dev)
@@ -1345,7 +1359,7 @@ static int xadc_probe(struct platform_device *pdev)
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->info = &xadc_info;
- ret = xadc_parse_dt(indio_dev, dev->of_node, &conf0);
+ ret = xadc_parse_dt(indio_dev, dev->of_node, &conf0, irq);
if (ret)
return ret;
@@ -1357,14 +1371,16 @@ static int xadc_probe(struct platform_device *pdev)
if (ret)
return ret;
- xadc->convst_trigger = xadc_alloc_trigger(indio_dev, "convst");
- if (IS_ERR(xadc->convst_trigger))
- return PTR_ERR(xadc->convst_trigger);
+ if (irq > 0) {
+ xadc->convst_trigger = xadc_alloc_trigger(indio_dev, "convst");
+ if (IS_ERR(xadc->convst_trigger))
+ return PTR_ERR(xadc->convst_trigger);
- xadc->samplerate_trigger = xadc_alloc_trigger(indio_dev,
- "samplerate");
- if (IS_ERR(xadc->samplerate_trigger))
- return PTR_ERR(xadc->samplerate_trigger);
+ xadc->samplerate_trigger = xadc_alloc_trigger(indio_dev,
+ "samplerate");
+ if (IS_ERR(xadc->samplerate_trigger))
+ return PTR_ERR(xadc->samplerate_trigger);
+ }
}
xadc->clk = devm_clk_get(dev, NULL);
@@ -1396,15 +1412,17 @@ static int xadc_probe(struct platform_device *pdev)
}
}
- ret = devm_request_irq(dev, irq, xadc->ops->interrupt_handler, 0,
- dev_name(dev), indio_dev);
- if (ret)
- return ret;
+ if (irq > 0) {
+ ret = devm_request_irq(dev, irq, xadc->ops->interrupt_handler,
+ 0, dev_name(dev), indio_dev);
+ if (ret)
+ return ret;
- ret = devm_add_action_or_reset(dev, xadc_cancel_delayed_work,
- &xadc->zynq_unmask_work);
- if (ret)
- return ret;
+ ret = devm_add_action_or_reset(dev, xadc_cancel_delayed_work,
+ &xadc->zynq_unmask_work);
+ if (ret)
+ return ret;
+ }
ret = xadc->ops->setup(pdev, indio_dev, irq);
if (ret)
diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig
new file mode 100644
index 000000000000..138492362f20
--- /dev/null
+++ b/drivers/iio/addac/Kconfig
@@ -0,0 +1,20 @@
+#
+# ADC DAC drivers
+#
+# When adding new entries keep the list in alphabetical order
+
+menu "Analog to digital and digital to analog converters"
+
+config AD74413R
+ tristate "Analog Devices AD74412R/AD74413R driver"
+ depends on GPIOLIB && SPI
+ select REGMAP_SPI
+ select CRC8
+ help
+ Say yes here to build support for Analog Devices AD74412R/AD74413R
+ quad-channel software configurable input/output solution.
+
+ To compile this driver as a module, choose M here: the
+ module will be called ad74413r.
+
+endmenu
diff --git a/drivers/iio/addac/Makefile b/drivers/iio/addac/Makefile
new file mode 100644
index 000000000000..cfd4bbe64ad3
--- /dev/null
+++ b/drivers/iio/addac/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for industrial I/O ADDAC drivers
+#
+
+# When adding new entries keep the list in alphabetical order
+obj-$(CONFIG_AD74413R) += ad74413r.o
diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
new file mode 100644
index 000000000000..5271073bb74e
--- /dev/null
+++ b/drivers/iio/addac/ad74413r.c
@@ -0,0 +1,1475 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Analog Devices, Inc.
+ * Author: Cosmin Tanislav <cosmin.tanislav@analog.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/bitfield.h>
+#include <linux/crc8.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <dt-bindings/iio/addac/adi,ad74413r.h>
+
+#define AD74413R_CRC_POLYNOMIAL 0x7
+DECLARE_CRC8_TABLE(ad74413r_crc8_table);
+
+#define AD74413R_CHANNEL_MAX 4
+
+#define AD74413R_FRAME_SIZE 4
+
+struct ad74413r_chip_info {
+ const char *name;
+ bool hart_support;
+};
+
+struct ad74413r_channel_config {
+ u32 func;
+ bool gpo_comparator;
+ bool initialized;
+};
+
+struct ad74413r_channels {
+ struct iio_chan_spec *channels;
+ unsigned int num_channels;
+};
+
+struct ad74413r_state {
+ struct ad74413r_channel_config channel_configs[AD74413R_CHANNEL_MAX];
+ unsigned int gpo_gpio_offsets[AD74413R_CHANNEL_MAX];
+ unsigned int comp_gpio_offsets[AD74413R_CHANNEL_MAX];
+ struct gpio_chip gpo_gpiochip;
+ struct gpio_chip comp_gpiochip;
+ struct completion adc_data_completion;
+ unsigned int num_gpo_gpios;
+ unsigned int num_comparator_gpios;
+ u32 sense_resistor_ohms;
+
+ /*
+ * Synchronize consecutive operations when doing a one-shot
+ * conversion and when updating the ADC samples SPI message.
+ */
+ struct mutex lock;
+
+ const struct ad74413r_chip_info *chip_info;
+ struct spi_device *spi;
+ struct regulator *refin_reg;
+ struct regmap *regmap;
+ struct device *dev;
+ struct iio_trigger *trig;
+
+ size_t adc_active_channels;
+ struct spi_message adc_samples_msg;
+ struct spi_transfer adc_samples_xfer[AD74413R_CHANNEL_MAX + 1];
+
+ /*
+ * DMA (thus cache coherency maintenance) requires the
+ * transfer buffers to live in their own cache lines.
+ */
+ struct {
+ u8 rx_buf[AD74413R_FRAME_SIZE * AD74413R_CHANNEL_MAX];
+ s64 timestamp;
+ } adc_samples_buf ____cacheline_aligned;
+
+ u8 adc_samples_tx_buf[AD74413R_FRAME_SIZE * AD74413R_CHANNEL_MAX];
+ u8 reg_tx_buf[AD74413R_FRAME_SIZE];
+ u8 reg_rx_buf[AD74413R_FRAME_SIZE];
+};
+
+#define AD74413R_REG_NOP 0x00
+
+#define AD74413R_REG_CH_FUNC_SETUP_X(x) (0x01 + (x))
+#define AD74413R_CH_FUNC_SETUP_MASK GENMASK(3, 0)
+
+#define AD74413R_REG_ADC_CONFIG_X(x) (0x05 + (x))
+#define AD74413R_ADC_CONFIG_RANGE_MASK GENMASK(7, 5)
+#define AD74413R_ADC_CONFIG_REJECTION_MASK GENMASK(4, 3)
+#define AD74413R_ADC_RANGE_10V 0b000
+#define AD74413R_ADC_RANGE_2P5V_EXT_POW 0b001
+#define AD74413R_ADC_RANGE_2P5V_INT_POW 0b010
+#define AD74413R_ADC_RANGE_5V_BI_DIR 0b011
+#define AD74413R_ADC_REJECTION_50_60 0b00
+#define AD74413R_ADC_REJECTION_NONE 0b01
+#define AD74413R_ADC_REJECTION_50_60_HART 0b10
+#define AD74413R_ADC_REJECTION_HART 0b11
+
+#define AD74413R_REG_DIN_CONFIG_X(x) (0x09 + (x))
+#define AD74413R_DIN_DEBOUNCE_MASK GENMASK(4, 0)
+#define AD74413R_DIN_DEBOUNCE_LEN BIT(5)
+
+#define AD74413R_REG_DAC_CODE_X(x) (0x16 + (x))
+#define AD74413R_DAC_CODE_MAX GENMASK(12, 0)
+#define AD74413R_DAC_VOLTAGE_MAX 11000
+
+#define AD74413R_REG_GPO_PAR_DATA 0x0d
+#define AD74413R_REG_GPO_CONFIG_X(x) (0x0e + (x))
+#define AD74413R_GPO_CONFIG_DATA_MASK BIT(3)
+#define AD74413R_GPO_CONFIG_SELECT_MASK GENMASK(2, 0)
+#define AD74413R_GPO_CONFIG_100K_PULL_DOWN 0b000
+#define AD74413R_GPO_CONFIG_LOGIC 0b001
+#define AD74413R_GPO_CONFIG_LOGIC_PARALLEL 0b010
+#define AD74413R_GPO_CONFIG_COMPARATOR 0b011
+#define AD74413R_GPO_CONFIG_HIGH_IMPEDANCE 0b100
+
+#define AD74413R_REG_ADC_CONV_CTRL 0x23
+#define AD74413R_CONV_SEQ_MASK GENMASK(9, 8)
+#define AD74413R_CONV_SEQ_ON 0b00
+#define AD74413R_CONV_SEQ_SINGLE 0b01
+#define AD74413R_CONV_SEQ_CONTINUOUS 0b10
+#define AD74413R_CONV_SEQ_OFF 0b11
+#define AD74413R_CH_EN_MASK(x) BIT(x)
+
+#define AD74413R_REG_DIN_COMP_OUT 0x25
+#define AD74413R_DIN_COMP_OUT_SHIFT_X(x) x
+
+#define AD74413R_REG_ADC_RESULT_X(x) (0x26 + (x))
+#define AD74413R_ADC_RESULT_MAX GENMASK(15, 0)
+
+#define AD74413R_REG_READ_SELECT 0x41
+
+#define AD74413R_REG_CMD_KEY 0x44
+#define AD74413R_CMD_KEY_LDAC 0x953a
+#define AD74413R_CMD_KEY_RESET1 0x15fa
+#define AD74413R_CMD_KEY_RESET2 0xaf51
+
+static const int ad74413r_adc_sampling_rates[] = {
+ 20, 4800,
+};
+
+static const int ad74413r_adc_sampling_rates_hart[] = {
+ 10, 20, 1200, 4800,
+};
+
+static int ad74413r_crc(u8 *buf)
+{
+ return crc8(ad74413r_crc8_table, buf, 3, 0);
+}
+
+static void ad74413r_format_reg_write(u8 reg, u16 val, u8 *buf)
+{
+ buf[0] = reg;
+ put_unaligned_be16(val, &buf[1]);
+ buf[3] = ad74413r_crc(buf);
+}
+
+static int ad74413r_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+ struct ad74413r_state *st = context;
+
+ ad74413r_format_reg_write(reg, val, st->reg_tx_buf);
+
+ return spi_write(st->spi, st->reg_tx_buf, AD74413R_FRAME_SIZE);
+}
+
+static int ad74413r_crc_check(struct ad74413r_state *st, u8 *buf)
+{
+ u8 expected_crc = ad74413r_crc(buf);
+
+ if (buf[3] != expected_crc) {
+ dev_err(st->dev, "Bad CRC %02x for %02x%02x%02x\n",
+ buf[3], buf[0], buf[1], buf[2]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ad74413r_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+ struct ad74413r_state *st = context;
+ struct spi_transfer reg_read_xfer[] = {
+ {
+ .tx_buf = st->reg_tx_buf,
+ .len = AD74413R_FRAME_SIZE,
+ .cs_change = 1,
+ },
+ {
+ .rx_buf = st->reg_rx_buf,
+ .len = AD74413R_FRAME_SIZE,
+ },
+ };
+ int ret;
+
+ ad74413r_format_reg_write(AD74413R_REG_READ_SELECT, reg,
+ st->reg_tx_buf);
+
+ ret = spi_sync_transfer(st->spi, reg_read_xfer,
+ ARRAY_SIZE(reg_read_xfer));
+ if (ret)
+ return ret;
+
+ ret = ad74413r_crc_check(st, st->reg_rx_buf);
+ if (ret)
+ return ret;
+
+ *val = get_unaligned_be16(&st->reg_rx_buf[1]);
+
+ return 0;
+}
+
+static const struct regmap_config ad74413r_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .reg_read = ad74413r_reg_read,
+ .reg_write = ad74413r_reg_write,
+};
+
+static int ad74413r_set_gpo_config(struct ad74413r_state *st,
+ unsigned int offset, u8 mode)
+{
+ return regmap_update_bits(st->regmap, AD74413R_REG_GPO_CONFIG_X(offset),
+ AD74413R_GPO_CONFIG_SELECT_MASK, mode);
+}
+
+static const unsigned int ad74413r_debounce_map[AD74413R_DIN_DEBOUNCE_LEN] = {
+ 0, 13, 18, 24, 32, 42, 56, 75,
+ 100, 130, 180, 240, 320, 420, 560, 750,
+ 1000, 1300, 1800, 2400, 3200, 4200, 5600, 7500,
+ 10000, 13000, 18000, 24000, 32000, 42000, 56000, 75000,
+};
+
+static int ad74413r_set_comp_debounce(struct ad74413r_state *st,
+ unsigned int offset,
+ unsigned int debounce)
+{
+ unsigned int val = AD74413R_DIN_DEBOUNCE_LEN - 1;
+ unsigned int i;
+
+ for (i = 0; i < AD74413R_DIN_DEBOUNCE_LEN; i++)
+ if (debounce <= ad74413r_debounce_map[i]) {
+ val = i;
+ break;
+ }
+
+ return regmap_update_bits(st->regmap,
+ AD74413R_REG_DIN_CONFIG_X(offset),
+ AD74413R_DIN_DEBOUNCE_MASK,
+ val);
+}
+
+static void ad74413r_gpio_set(struct gpio_chip *chip,
+ unsigned int offset, int val)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned int real_offset = st->gpo_gpio_offsets[offset];
+ int ret;
+
+ ret = ad74413r_set_gpo_config(st, real_offset,
+ AD74413R_GPO_CONFIG_LOGIC);
+ if (ret)
+ return;
+
+ regmap_update_bits(st->regmap, AD74413R_REG_GPO_CONFIG_X(real_offset),
+ AD74413R_GPO_CONFIG_DATA_MASK,
+ val ? AD74413R_GPO_CONFIG_DATA_MASK : 0);
+}
+
+static void ad74413r_gpio_set_multiple(struct gpio_chip *chip,
+ unsigned long *mask,
+ unsigned long *bits)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned long real_mask = 0;
+ unsigned long real_bits = 0;
+ unsigned int offset = 0;
+ int ret;
+
+ for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) {
+ unsigned int real_offset = st->gpo_gpio_offsets[offset];
+
+ ret = ad74413r_set_gpo_config(st, real_offset,
+ AD74413R_GPO_CONFIG_LOGIC_PARALLEL);
+ if (ret)
+ return;
+
+ real_mask |= BIT(real_offset);
+ if (*bits & offset)
+ real_bits |= BIT(real_offset);
+ }
+
+ regmap_update_bits(st->regmap, AD74413R_REG_GPO_PAR_DATA,
+ real_mask, real_bits);
+}
+
+static int ad74413r_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned int real_offset = st->comp_gpio_offsets[offset];
+ unsigned int status;
+ int ret;
+
+ ret = regmap_read(st->regmap, AD74413R_REG_DIN_COMP_OUT, &status);
+ if (ret)
+ return ret;
+
+ status &= AD74413R_DIN_COMP_OUT_SHIFT_X(real_offset);
+
+ return status ? 1 : 0;
+}
+
+static int ad74413r_gpio_get_multiple(struct gpio_chip *chip,
+ unsigned long *mask,
+ unsigned long *bits)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned int offset = 0;
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read(st->regmap, AD74413R_REG_DIN_COMP_OUT, &val);
+ if (ret)
+ return ret;
+
+ for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) {
+ unsigned int real_offset = st->comp_gpio_offsets[offset];
+
+ if (val & BIT(real_offset))
+ *bits |= offset;
+ }
+
+ return ret;
+}
+
+static int ad74413r_gpio_get_gpo_direction(struct gpio_chip *chip,
+ unsigned int offset)
+{
+ return GPIO_LINE_DIRECTION_OUT;
+}
+
+static int ad74413r_gpio_get_comp_direction(struct gpio_chip *chip,
+ unsigned int offset)
+{
+ return GPIO_LINE_DIRECTION_IN;
+}
+
+static int ad74413r_gpio_set_gpo_config(struct gpio_chip *chip,
+ unsigned int offset,
+ unsigned long config)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned int real_offset = st->gpo_gpio_offsets[offset];
+
+ switch (pinconf_to_config_param(config)) {
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+ return ad74413r_set_gpo_config(st, real_offset,
+ AD74413R_GPO_CONFIG_100K_PULL_DOWN);
+ case PIN_CONFIG_BIAS_HIGH_IMPEDANCE:
+ return ad74413r_set_gpo_config(st, real_offset,
+ AD74413R_GPO_CONFIG_HIGH_IMPEDANCE);
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int ad74413r_gpio_set_comp_config(struct gpio_chip *chip,
+ unsigned int offset,
+ unsigned long config)
+{
+ struct ad74413r_state *st = gpiochip_get_data(chip);
+ unsigned int real_offset = st->comp_gpio_offsets[offset];
+
+ switch (pinconf_to_config_param(config)) {
+ case PIN_CONFIG_INPUT_DEBOUNCE:
+ return ad74413r_set_comp_debounce(st, real_offset,
+ pinconf_to_config_argument(config));
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int ad74413r_reset(struct ad74413r_state *st)
+{
+ int ret;
+
+ ret = regmap_write(st->regmap, AD74413R_REG_CMD_KEY,
+ AD74413R_CMD_KEY_RESET1);
+ if (ret)
+ return ret;
+
+ return regmap_write(st->regmap, AD74413R_REG_CMD_KEY,
+ AD74413R_CMD_KEY_RESET2);
+}
+
+static int ad74413r_set_channel_dac_code(struct ad74413r_state *st,
+ unsigned int channel, int dac_code)
+{
+ struct reg_sequence reg_seq[2] = {
+ { AD74413R_REG_DAC_CODE_X(channel), dac_code },
+ { AD74413R_REG_CMD_KEY, AD74413R_CMD_KEY_LDAC },
+ };
+
+ return regmap_multi_reg_write(st->regmap, reg_seq, 2);
+}
+
+static int ad74413r_set_channel_function(struct ad74413r_state *st,
+ unsigned int channel, u8 func)
+{
+ return regmap_update_bits(st->regmap,
+ AD74413R_REG_CH_FUNC_SETUP_X(channel),
+ AD74413R_CH_FUNC_SETUP_MASK, func);
+}
+
+static int ad74413r_set_adc_conv_seq(struct ad74413r_state *st,
+ unsigned int status)
+{
+ int ret;
+
+ /*
+ * These bits do not clear when a conversion completes.
+ * To enable a subsequent conversion, repeat the write.
+ */
+ ret = regmap_write_bits(st->regmap, AD74413R_REG_ADC_CONV_CTRL,
+ AD74413R_CONV_SEQ_MASK,
+ FIELD_PREP(AD74413R_CONV_SEQ_MASK, status));
+ if (ret)
+ return ret;
+
+ /*
+ * Wait 100us before starting conversions.
+ */
+ usleep_range(100, 120);
+
+ return 0;
+}
+
+static int ad74413r_set_adc_channel_enable(struct ad74413r_state *st,
+ unsigned int channel,
+ bool status)
+{
+ return regmap_update_bits(st->regmap, AD74413R_REG_ADC_CONV_CTRL,
+ AD74413R_CH_EN_MASK(channel),
+ status ? AD74413R_CH_EN_MASK(channel) : 0);
+}
+
+static int ad74413r_get_adc_range(struct ad74413r_state *st,
+ unsigned int channel,
+ unsigned int *val)
+{
+ int ret;
+
+ ret = regmap_read(st->regmap, AD74413R_REG_ADC_CONFIG_X(channel), val);
+ if (ret)
+ return ret;
+
+ *val = FIELD_GET(AD74413R_ADC_CONFIG_RANGE_MASK, *val);
+
+ return 0;
+}
+
+static int ad74413r_get_adc_rejection(struct ad74413r_state *st,
+ unsigned int channel,
+ unsigned int *val)
+{
+ int ret;
+
+ ret = regmap_read(st->regmap, AD74413R_REG_ADC_CONFIG_X(channel), val);
+ if (ret)
+ return ret;
+
+ *val = FIELD_GET(AD74413R_ADC_CONFIG_REJECTION_MASK, *val);
+
+ return 0;
+}
+
+static int ad74413r_set_adc_rejection(struct ad74413r_state *st,
+ unsigned int channel,
+ unsigned int val)
+{
+ return regmap_update_bits(st->regmap,
+ AD74413R_REG_ADC_CONFIG_X(channel),
+ AD74413R_ADC_CONFIG_REJECTION_MASK,
+ FIELD_PREP(AD74413R_ADC_CONFIG_REJECTION_MASK,
+ val));
+}
+
+static int ad74413r_rejection_to_rate(struct ad74413r_state *st,
+ unsigned int rej, int *val)
+{
+ switch (rej) {
+ case AD74413R_ADC_REJECTION_50_60:
+ *val = 20;
+ return 0;
+ case AD74413R_ADC_REJECTION_NONE:
+ *val = 4800;
+ return 0;
+ case AD74413R_ADC_REJECTION_50_60_HART:
+ *val = 10;
+ return 0;
+ case AD74413R_ADC_REJECTION_HART:
+ *val = 1200;
+ return 0;
+ default:
+ dev_err(st->dev, "ADC rejection invalid\n");
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_rate_to_rejection(struct ad74413r_state *st,
+ int rate, unsigned int *val)
+{
+ switch (rate) {
+ case 20:
+ *val = AD74413R_ADC_REJECTION_50_60;
+ return 0;
+ case 4800:
+ *val = AD74413R_ADC_REJECTION_NONE;
+ return 0;
+ case 10:
+ *val = AD74413R_ADC_REJECTION_50_60_HART;
+ return 0;
+ case 1200:
+ *val = AD74413R_ADC_REJECTION_HART;
+ return 0;
+ default:
+ dev_err(st->dev, "ADC rate invalid\n");
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_range_to_voltage_range(struct ad74413r_state *st,
+ unsigned int range, int *val)
+{
+ switch (range) {
+ case AD74413R_ADC_RANGE_10V:
+ *val = 10000;
+ return 0;
+ case AD74413R_ADC_RANGE_2P5V_EXT_POW:
+ case AD74413R_ADC_RANGE_2P5V_INT_POW:
+ *val = 2500;
+ return 0;
+ case AD74413R_ADC_RANGE_5V_BI_DIR:
+ *val = 5000;
+ return 0;
+ default:
+ dev_err(st->dev, "ADC range invalid\n");
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_range_to_voltage_offset(struct ad74413r_state *st,
+ unsigned int range, int *val)
+{
+ switch (range) {
+ case AD74413R_ADC_RANGE_10V:
+ case AD74413R_ADC_RANGE_2P5V_EXT_POW:
+ *val = 0;
+ return 0;
+ case AD74413R_ADC_RANGE_2P5V_INT_POW:
+ case AD74413R_ADC_RANGE_5V_BI_DIR:
+ *val = -2500;
+ return 0;
+ default:
+ dev_err(st->dev, "ADC range invalid\n");
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_range_to_voltage_offset_raw(struct ad74413r_state *st,
+ unsigned int range, int *val)
+{
+ switch (range) {
+ case AD74413R_ADC_RANGE_10V:
+ case AD74413R_ADC_RANGE_2P5V_EXT_POW:
+ *val = 0;
+ return 0;
+ case AD74413R_ADC_RANGE_2P5V_INT_POW:
+ *val = -((int)AD74413R_ADC_RESULT_MAX);
+ return 0;
+ case AD74413R_ADC_RANGE_5V_BI_DIR:
+ *val = -((int)AD74413R_ADC_RESULT_MAX / 2);
+ return 0;
+ default:
+ dev_err(st->dev, "ADC range invalid\n");
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_get_output_voltage_scale(struct ad74413r_state *st,
+ int *val, int *val2)
+{
+ *val = AD74413R_DAC_VOLTAGE_MAX;
+ *val2 = AD74413R_DAC_CODE_MAX;
+
+ return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74413r_get_output_current_scale(struct ad74413r_state *st,
+ int *val, int *val2)
+{
+ *val = regulator_get_voltage(st->refin_reg);
+ *val2 = st->sense_resistor_ohms * AD74413R_DAC_CODE_MAX * 1000;
+
+ return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74413r_get_input_voltage_scale(struct ad74413r_state *st,
+ unsigned int channel,
+ int *val, int *val2)
+{
+ unsigned int range;
+ int ret;
+
+ ret = ad74413r_get_adc_range(st, channel, &range);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_range_to_voltage_range(st, range, val);
+ if (ret)
+ return ret;
+
+ *val2 = AD74413R_ADC_RESULT_MAX;
+
+ return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74413r_get_input_voltage_offset(struct ad74413r_state *st,
+ unsigned int channel, int *val)
+{
+ unsigned int range;
+ int ret;
+
+ ret = ad74413r_get_adc_range(st, channel, &range);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_range_to_voltage_offset_raw(st, range, val);
+ if (ret)
+ return ret;
+
+ return IIO_VAL_INT;
+}
+
+static int ad74413r_get_input_current_scale(struct ad74413r_state *st,
+ unsigned int channel, int *val,
+ int *val2)
+{
+ unsigned int range;
+ int ret;
+
+ ret = ad74413r_get_adc_range(st, channel, &range);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_range_to_voltage_range(st, range, val);
+ if (ret)
+ return ret;
+
+ *val2 = AD74413R_ADC_RESULT_MAX * st->sense_resistor_ohms;
+
+ return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74413_get_input_current_offset(struct ad74413r_state *st,
+ unsigned int channel, int *val)
+{
+ unsigned int range;
+ int voltage_range;
+ int voltage_offset;
+ int ret;
+
+ ret = ad74413r_get_adc_range(st, channel, &range);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_range_to_voltage_range(st, range, &voltage_range);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_range_to_voltage_offset(st, range, &voltage_offset);
+ if (ret)
+ return ret;
+
+ *val = voltage_offset * AD74413R_ADC_RESULT_MAX / voltage_range;
+
+ return IIO_VAL_INT;
+}
+
+static int ad74413r_get_adc_rate(struct ad74413r_state *st,
+ unsigned int channel, int *val)
+{
+ unsigned int rejection;
+ int ret;
+
+ ret = ad74413r_get_adc_rejection(st, channel, &rejection);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_rejection_to_rate(st, rejection, val);
+ if (ret)
+ return ret;
+
+ return IIO_VAL_INT;
+}
+
+static int ad74413r_set_adc_rate(struct ad74413r_state *st,
+ unsigned int channel, int val)
+{
+ unsigned int rejection;
+ int ret;
+
+ ret = ad74413r_rate_to_rejection(st, val, &rejection);
+ if (ret)
+ return ret;
+
+ return ad74413r_set_adc_rejection(st, channel, rejection);
+}
+
+static irqreturn_t ad74413r_trigger_handler(int irq, void *p)
+{
+ struct iio_poll_func *pf = p;
+ struct iio_dev *indio_dev = pf->indio_dev;
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ u8 *rx_buf = st->adc_samples_buf.rx_buf;
+ unsigned int i;
+ int ret;
+
+ ret = spi_sync(st->spi, &st->adc_samples_msg);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < st->adc_active_channels; i++)
+ ad74413r_crc_check(st, &rx_buf[i * AD74413R_FRAME_SIZE]);
+
+ iio_push_to_buffers_with_timestamp(indio_dev, &st->adc_samples_buf,
+ iio_get_time_ns(indio_dev));
+
+out:
+ iio_trigger_notify_done(indio_dev->trig);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ad74413r_adc_data_interrupt(int irq, void *data)
+{
+ struct iio_dev *indio_dev = data;
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ if (iio_buffer_enabled(indio_dev))
+ iio_trigger_poll(st->trig);
+ else
+ complete(&st->adc_data_completion);
+
+ return IRQ_HANDLED;
+}
+
+static int _ad74413r_get_single_adc_result(struct ad74413r_state *st,
+ unsigned int channel, int *val)
+{
+ unsigned int uval;
+ int ret;
+
+ reinit_completion(&st->adc_data_completion);
+
+ ret = ad74413r_set_adc_channel_enable(st, channel, true);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_set_adc_conv_seq(st, AD74413R_CONV_SEQ_SINGLE);
+ if (ret)
+ return ret;
+
+ ret = wait_for_completion_timeout(&st->adc_data_completion,
+ msecs_to_jiffies(1000));
+ if (!ret) {
+ ret = -ETIMEDOUT;
+ return ret;
+ }
+
+ ret = regmap_read(st->regmap, AD74413R_REG_ADC_RESULT_X(channel),
+ &uval);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_set_adc_conv_seq(st, AD74413R_CONV_SEQ_OFF);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_set_adc_channel_enable(st, channel, false);
+ if (ret)
+ return ret;
+
+ *val = uval;
+
+ return IIO_VAL_INT;
+}
+
+static int ad74413r_get_single_adc_result(struct iio_dev *indio_dev,
+ unsigned int channel, int *val)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ int ret;
+
+ ret = iio_device_claim_direct_mode(indio_dev);
+ if (ret)
+ return ret;
+
+ mutex_lock(&st->lock);
+ ret = _ad74413r_get_single_adc_result(st, channel, val);
+ mutex_unlock(&st->lock);
+
+ iio_device_release_direct_mode(indio_dev);
+
+ return ret;
+}
+
+static void ad74413r_adc_to_resistance_result(int adc_result, int *val)
+{
+ if (adc_result == AD74413R_ADC_RESULT_MAX)
+ adc_result = AD74413R_ADC_RESULT_MAX - 1;
+
+ *val = DIV_ROUND_CLOSEST(adc_result * 2100,
+ AD74413R_ADC_RESULT_MAX - adc_result);
+}
+
+static int ad74413r_update_scan_mode(struct iio_dev *indio_dev,
+ const unsigned long *active_scan_mask)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ struct spi_transfer *xfer = st->adc_samples_xfer;
+ u8 *rx_buf = &st->adc_samples_buf.rx_buf[-1 * AD74413R_FRAME_SIZE];
+ u8 *tx_buf = st->adc_samples_tx_buf;
+ unsigned int channel;
+ int ret = -EINVAL;
+
+ mutex_lock(&st->lock);
+
+ spi_message_init(&st->adc_samples_msg);
+ st->adc_active_channels = 0;
+
+ for_each_clear_bit(channel, active_scan_mask, AD74413R_CHANNEL_MAX) {
+ ret = ad74413r_set_adc_channel_enable(st, channel, false);
+ if (ret)
+ goto out;
+ }
+
+ if (*active_scan_mask == 0)
+ goto out;
+
+ /*
+ * The read select register is used to select which register's value
+ * will be sent by the slave on the next SPI frame.
+ *
+ * Create an SPI message that, on each step, writes to the read select
+ * register to select the ADC result of the next enabled channel, and
+ * reads the ADC result of the previous enabled channel.
+ *
+ * Example:
+ * W: [WCH1] [WCH2] [WCH2] [WCH3] [ ]
+ * R: [ ] [RCH1] [RCH2] [RCH3] [RCH4]
+ */
+
+ for_each_set_bit(channel, active_scan_mask, AD74413R_CHANNEL_MAX) {
+ ret = ad74413r_set_adc_channel_enable(st, channel, true);
+ if (ret)
+ goto out;
+
+ st->adc_active_channels++;
+
+ if (xfer == st->adc_samples_xfer)
+ xfer->rx_buf = NULL;
+ else
+ xfer->rx_buf = rx_buf;
+
+ xfer->tx_buf = tx_buf;
+ xfer->len = AD74413R_FRAME_SIZE;
+ xfer->cs_change = 1;
+
+ ad74413r_format_reg_write(AD74413R_REG_READ_SELECT,
+ AD74413R_REG_ADC_RESULT_X(channel),
+ tx_buf);
+
+ spi_message_add_tail(xfer, &st->adc_samples_msg);
+
+ xfer++;
+ tx_buf += AD74413R_FRAME_SIZE;
+ rx_buf += AD74413R_FRAME_SIZE;
+ }
+
+ xfer->rx_buf = rx_buf;
+ xfer->tx_buf = NULL;
+ xfer->len = AD74413R_FRAME_SIZE;
+ xfer->cs_change = 0;
+
+ spi_message_add_tail(xfer, &st->adc_samples_msg);
+
+out:
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad74413r_buffer_postenable(struct iio_dev *indio_dev)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ return ad74413r_set_adc_conv_seq(st, AD74413R_CONV_SEQ_CONTINUOUS);
+}
+
+static int ad74413r_buffer_predisable(struct iio_dev *indio_dev)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ return ad74413r_set_adc_conv_seq(st, AD74413R_CONV_SEQ_OFF);
+}
+
+static int ad74413r_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2, long info)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ switch (info) {
+ case IIO_CHAN_INFO_SCALE:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (chan->output)
+ return ad74413r_get_output_voltage_scale(st,
+ val, val2);
+ else
+ return ad74413r_get_input_voltage_scale(st,
+ chan->channel, val, val2);
+ case IIO_CURRENT:
+ if (chan->output)
+ return ad74413r_get_output_current_scale(st,
+ val, val2);
+ else
+ return ad74413r_get_input_current_scale(st,
+ chan->channel, val, val2);
+ default:
+ return -EINVAL;
+ }
+ case IIO_CHAN_INFO_OFFSET:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ return ad74413r_get_input_voltage_offset(st,
+ chan->channel, val);
+ case IIO_CURRENT:
+ return ad74413_get_input_current_offset(st,
+ chan->channel, val);
+ default:
+ return -EINVAL;
+ }
+ case IIO_CHAN_INFO_RAW:
+ if (chan->output)
+ return -EINVAL;
+
+ return ad74413r_get_single_adc_result(indio_dev, chan->channel,
+ val);
+ case IIO_CHAN_INFO_PROCESSED: {
+ int ret;
+
+ ret = ad74413r_get_single_adc_result(indio_dev, chan->channel,
+ val);
+ if (ret)
+ return ret;
+
+ ad74413r_adc_to_resistance_result(*val, val);
+
+ return ret;
+ }
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ return ad74413r_get_adc_rate(st, chan->channel, val);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_write_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int val, int val2, long info)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ switch (info) {
+ case IIO_CHAN_INFO_RAW:
+ if (!chan->output)
+ return -EINVAL;
+
+ if (val < 0 || val > AD74413R_DAC_CODE_MAX) {
+ dev_err(st->dev, "Invalid DAC code\n");
+ return -EINVAL;
+ }
+
+ return ad74413r_set_channel_dac_code(st, chan->channel, val);
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ return ad74413r_set_adc_rate(st, chan->channel, val);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ad74413r_read_avail(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ const int **vals, int *type, int *length,
+ long info)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+
+ switch (info) {
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ if (st->chip_info->hart_support) {
+ *vals = ad74413r_adc_sampling_rates_hart;
+ *length = ARRAY_SIZE(ad74413r_adc_sampling_rates_hart);
+ } else {
+ *vals = ad74413r_adc_sampling_rates;
+ *length = ARRAY_SIZE(ad74413r_adc_sampling_rates);
+ }
+ *type = IIO_VAL_INT;
+ return IIO_AVAIL_LIST;
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct iio_buffer_setup_ops ad74413r_buffer_ops = {
+ .postenable = &ad74413r_buffer_postenable,
+ .predisable = &ad74413r_buffer_predisable,
+};
+
+static const struct iio_trigger_ops ad74413r_trigger_ops = {
+ .validate_device = iio_trigger_validate_own_device,
+};
+
+static const struct iio_info ad74413r_info = {
+ .read_raw = &ad74413r_read_raw,
+ .write_raw = &ad74413r_write_raw,
+ .read_avail = &ad74413r_read_avail,
+ .update_scan_mode = &ad74413r_update_scan_mode,
+};
+
+#define AD74413R_DAC_CHANNEL(_type, extra_mask_separate) \
+ { \
+ .type = (_type), \
+ .indexed = 1, \
+ .output = 1, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) \
+ | (extra_mask_separate), \
+ }
+
+#define AD74413R_ADC_CHANNEL(_type, extra_mask_separate) \
+ { \
+ .type = (_type), \
+ .indexed = 1, \
+ .output = 0, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) \
+ | BIT(IIO_CHAN_INFO_SAMP_FREQ) \
+ | (extra_mask_separate), \
+ .info_mask_separate_available = \
+ BIT(IIO_CHAN_INFO_SAMP_FREQ), \
+ .scan_type = { \
+ .sign = 'u', \
+ .realbits = 16, \
+ .storagebits = 32, \
+ .shift = 8, \
+ .endianness = IIO_BE, \
+ }, \
+ }
+
+#define AD74413R_ADC_VOLTAGE_CHANNEL \
+ AD74413R_ADC_CHANNEL(IIO_VOLTAGE, BIT(IIO_CHAN_INFO_SCALE) \
+ | BIT(IIO_CHAN_INFO_OFFSET))
+
+#define AD74413R_ADC_CURRENT_CHANNEL \
+ AD74413R_ADC_CHANNEL(IIO_CURRENT, BIT(IIO_CHAN_INFO_SCALE) \
+ | BIT(IIO_CHAN_INFO_OFFSET))
+
+static struct iio_chan_spec ad74413r_voltage_output_channels[] = {
+ AD74413R_DAC_CHANNEL(IIO_VOLTAGE, BIT(IIO_CHAN_INFO_SCALE)),
+ AD74413R_ADC_CURRENT_CHANNEL,
+};
+
+static struct iio_chan_spec ad74413r_current_output_channels[] = {
+ AD74413R_DAC_CHANNEL(IIO_CURRENT, BIT(IIO_CHAN_INFO_SCALE)),
+ AD74413R_ADC_VOLTAGE_CHANNEL,
+};
+
+static struct iio_chan_spec ad74413r_voltage_input_channels[] = {
+ AD74413R_ADC_VOLTAGE_CHANNEL,
+};
+
+static struct iio_chan_spec ad74413r_current_input_channels[] = {
+ AD74413R_ADC_CURRENT_CHANNEL,
+};
+
+static struct iio_chan_spec ad74413r_resistance_input_channels[] = {
+ AD74413R_ADC_CHANNEL(IIO_RESISTANCE, BIT(IIO_CHAN_INFO_PROCESSED)),
+};
+
+static struct iio_chan_spec ad74413r_digital_input_channels[] = {
+ AD74413R_ADC_VOLTAGE_CHANNEL,
+};
+
+#define _AD74413R_CHANNELS(_channels) \
+ { \
+ .channels = _channels, \
+ .num_channels = ARRAY_SIZE(_channels), \
+ }
+
+#define AD74413R_CHANNELS(name) \
+ _AD74413R_CHANNELS(ad74413r_ ## name ## _channels)
+
+static const struct ad74413r_channels ad74413r_channels_map[] = {
+ [CH_FUNC_HIGH_IMPEDANCE] = AD74413R_CHANNELS(voltage_input),
+ [CH_FUNC_VOLTAGE_OUTPUT] = AD74413R_CHANNELS(voltage_output),
+ [CH_FUNC_CURRENT_OUTPUT] = AD74413R_CHANNELS(current_output),
+ [CH_FUNC_VOLTAGE_INPUT] = AD74413R_CHANNELS(voltage_input),
+ [CH_FUNC_CURRENT_INPUT_EXT_POWER] = AD74413R_CHANNELS(current_input),
+ [CH_FUNC_CURRENT_INPUT_LOOP_POWER] = AD74413R_CHANNELS(current_input),
+ [CH_FUNC_RESISTANCE_INPUT] = AD74413R_CHANNELS(resistance_input),
+ [CH_FUNC_DIGITAL_INPUT_LOGIC] = AD74413R_CHANNELS(digital_input),
+ [CH_FUNC_DIGITAL_INPUT_LOOP_POWER] = AD74413R_CHANNELS(digital_input),
+ [CH_FUNC_CURRENT_INPUT_EXT_POWER_HART] = AD74413R_CHANNELS(current_input),
+ [CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART] = AD74413R_CHANNELS(current_input),
+};
+
+static int ad74413r_parse_channel_config(struct iio_dev *indio_dev,
+ struct fwnode_handle *channel_node)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ struct ad74413r_channel_config *config;
+ u32 index;
+ int ret;
+
+ ret = fwnode_property_read_u32(channel_node, "reg", &index);
+ if (ret) {
+ dev_err(st->dev, "Failed to read channel reg: %d\n", ret);
+ return ret;
+ }
+
+ if (index >= AD74413R_CHANNEL_MAX) {
+ dev_err(st->dev, "Channel index %u is too large\n", index);
+ return -EINVAL;
+ }
+
+ config = &st->channel_configs[index];
+ if (config->initialized) {
+ dev_err(st->dev, "Channel %u already initialized\n", index);
+ return -EINVAL;
+ }
+
+ config->func = CH_FUNC_HIGH_IMPEDANCE;
+ fwnode_property_read_u32(channel_node, "adi,ch-func", &config->func);
+
+ if (config->func < CH_FUNC_MIN || config->func > CH_FUNC_MAX) {
+ dev_err(st->dev, "Invalid channel function %u\n", config->func);
+ return -EINVAL;
+ }
+
+ if (!st->chip_info->hart_support &&
+ (config->func == CH_FUNC_CURRENT_INPUT_EXT_POWER_HART ||
+ config->func == CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART)) {
+ dev_err(st->dev, "Unsupported HART function %u\n", config->func);
+ return -EINVAL;
+ }
+
+ if (config->func == CH_FUNC_DIGITAL_INPUT_LOGIC ||
+ config->func == CH_FUNC_DIGITAL_INPUT_LOOP_POWER)
+ st->num_comparator_gpios++;
+
+ config->gpo_comparator = fwnode_property_read_bool(channel_node,
+ "adi,gpo-comparator");
+
+ if (!config->gpo_comparator)
+ st->num_gpo_gpios++;
+
+ indio_dev->num_channels += ad74413r_channels_map[config->func].num_channels;
+
+ config->initialized = true;
+
+ return 0;
+}
+
+static int ad74413r_parse_channel_configs(struct iio_dev *indio_dev)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ struct fwnode_handle *channel_node = NULL;
+ int ret;
+
+ fwnode_for_each_available_child_node(dev_fwnode(st->dev), channel_node) {
+ ret = ad74413r_parse_channel_config(indio_dev, channel_node);
+ if (ret)
+ goto put_channel_node;
+ }
+
+ return 0;
+
+put_channel_node:
+ fwnode_handle_put(channel_node);
+
+ return ret;
+}
+
+static int ad74413r_setup_channels(struct iio_dev *indio_dev)
+{
+ struct ad74413r_state *st = iio_priv(indio_dev);
+ struct ad74413r_channel_config *config;
+ struct iio_chan_spec *channels, *chans;
+ unsigned int i, num_chans, chan_i;
+ int ret;
+
+ channels = devm_kcalloc(st->dev, sizeof(*channels),
+ indio_dev->num_channels, GFP_KERNEL);
+ if (!channels)
+ return -ENOMEM;
+
+ indio_dev->channels = channels;
+
+ for (i = 0; i < AD74413R_CHANNEL_MAX; i++) {
+ config = &st->channel_configs[i];
+ chans = ad74413r_channels_map[config->func].channels;
+ num_chans = ad74413r_channels_map[config->func].num_channels;
+
+ memcpy(channels, chans, num_chans * sizeof(*chans));
+
+ for (chan_i = 0; chan_i < num_chans; chan_i++) {
+ struct iio_chan_spec *chan = &channels[chan_i];
+
+ chan->channel = i;
+ if (chan->output)
+ chan->scan_index = -1;
+ else
+ chan->scan_index = i;
+ }
+
+ ret = ad74413r_set_channel_function(st, i, config->func);
+ if (ret)
+ return ret;
+
+ channels += num_chans;
+ }
+
+ return 0;
+}
+
+static int ad74413r_setup_gpios(struct ad74413r_state *st)
+{
+ struct ad74413r_channel_config *config;
+ unsigned int comp_gpio_i = 0;
+ unsigned int gpo_gpio_i = 0;
+ unsigned int i;
+ u8 gpo_config;
+ int ret;
+
+ for (i = 0; i < AD74413R_CHANNEL_MAX; i++) {
+ config = &st->channel_configs[i];
+
+ if (config->gpo_comparator) {
+ gpo_config = AD74413R_GPO_CONFIG_COMPARATOR;
+ } else {
+ gpo_config = AD74413R_GPO_CONFIG_LOGIC;
+ st->gpo_gpio_offsets[gpo_gpio_i++] = i;
+ }
+
+ if (config->func == CH_FUNC_DIGITAL_INPUT_LOGIC ||
+ config->func == CH_FUNC_DIGITAL_INPUT_LOOP_POWER)
+ st->comp_gpio_offsets[comp_gpio_i++] = i;
+
+ ret = ad74413r_set_gpo_config(st, i, gpo_config);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ad74413r_regulator_disable(void *regulator)
+{
+ regulator_disable(regulator);
+}
+
+static int ad74413r_probe(struct spi_device *spi)
+{
+ struct ad74413r_state *st;
+ struct iio_dev *indio_dev;
+ int ret;
+
+ indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ st = iio_priv(indio_dev);
+
+ st->spi = spi;
+ st->dev = &spi->dev;
+ st->chip_info = device_get_match_data(&spi->dev);
+ mutex_init(&st->lock);
+ init_completion(&st->adc_data_completion);
+
+ st->regmap = devm_regmap_init(st->dev, NULL, st,
+ &ad74413r_regmap_config);
+ if (IS_ERR(st->regmap))
+ return PTR_ERR(st->regmap);
+
+ st->refin_reg = devm_regulator_get(st->dev, "refin");
+ if (IS_ERR(st->refin_reg))
+ return dev_err_probe(st->dev, PTR_ERR(st->refin_reg),
+ "Failed to get refin regulator\n");
+
+ ret = regulator_enable(st->refin_reg);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(st->dev, ad74413r_regulator_disable,
+ st->refin_reg);
+ if (ret)
+ return ret;
+
+ st->sense_resistor_ohms = 100000000;
+ device_property_read_u32(st->dev, "shunt-resistor-micro-ohms",
+ &st->sense_resistor_ohms);
+ st->sense_resistor_ohms /= 1000000;
+
+ st->trig = devm_iio_trigger_alloc(st->dev, "%s-dev%d",
+ st->chip_info->name, iio_device_id(indio_dev));
+ if (!st->trig)
+ return -ENOMEM;
+
+ st->trig->ops = &ad74413r_trigger_ops;
+ iio_trigger_set_drvdata(st->trig, st);
+
+ ret = devm_iio_trigger_register(st->dev, st->trig);
+ if (ret)
+ return ret;
+
+ indio_dev->name = st->chip_info->name;
+ indio_dev->modes = INDIO_DIRECT_MODE;
+ indio_dev->info = &ad74413r_info;
+ indio_dev->trig = iio_trigger_get(st->trig);
+
+ ret = ad74413r_reset(st);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_parse_channel_configs(indio_dev);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_setup_channels(indio_dev);
+ if (ret)
+ return ret;
+
+ ret = ad74413r_setup_gpios(st);
+ if (ret)
+ return ret;
+
+ if (st->num_gpo_gpios) {
+ st->gpo_gpiochip.owner = THIS_MODULE;
+ st->gpo_gpiochip.label = st->chip_info->name;
+ st->gpo_gpiochip.base = -1;
+ st->gpo_gpiochip.ngpio = st->num_gpo_gpios;
+ st->gpo_gpiochip.parent = st->dev;
+ st->gpo_gpiochip.can_sleep = true;
+ st->gpo_gpiochip.set = ad74413r_gpio_set;
+ st->gpo_gpiochip.set_multiple = ad74413r_gpio_set_multiple;
+ st->gpo_gpiochip.set_config = ad74413r_gpio_set_gpo_config;
+ st->gpo_gpiochip.get_direction =
+ ad74413r_gpio_get_gpo_direction;
+
+ ret = devm_gpiochip_add_data(st->dev, &st->gpo_gpiochip, st);
+ if (ret)
+ return ret;
+ }
+
+ if (st->num_comparator_gpios) {
+ st->comp_gpiochip.owner = THIS_MODULE;
+ st->comp_gpiochip.label = st->chip_info->name;
+ st->comp_gpiochip.base = -1;
+ st->comp_gpiochip.ngpio = st->num_comparator_gpios;
+ st->comp_gpiochip.parent = st->dev;
+ st->comp_gpiochip.can_sleep = true;
+ st->comp_gpiochip.get = ad74413r_gpio_get;
+ st->comp_gpiochip.get_multiple = ad74413r_gpio_get_multiple;
+ st->comp_gpiochip.set_config = ad74413r_gpio_set_comp_config;
+ st->comp_gpiochip.get_direction =
+ ad74413r_gpio_get_comp_direction;
+
+ ret = devm_gpiochip_add_data(st->dev, &st->comp_gpiochip, st);
+ if (ret)
+ return ret;
+ }
+
+ ret = ad74413r_set_adc_conv_seq(st, AD74413R_CONV_SEQ_OFF);
+ if (ret)
+ return ret;
+
+ ret = devm_request_irq(st->dev, spi->irq, ad74413r_adc_data_interrupt,
+ 0, st->chip_info->name, indio_dev);
+ if (ret)
+ return dev_err_probe(st->dev, ret, "Failed to request irq\n");
+
+ ret = devm_iio_triggered_buffer_setup(st->dev, indio_dev,
+ &iio_pollfunc_store_time,
+ &ad74413r_trigger_handler,
+ &ad74413r_buffer_ops);
+ if (ret)
+ return ret;
+
+ return devm_iio_device_register(st->dev, indio_dev);
+}
+
+static int ad74413r_unregister_driver(struct spi_driver *spi)
+{
+ spi_unregister_driver(spi);
+
+ return 0;
+}
+
+static int __init ad74413r_register_driver(struct spi_driver *spi)
+{
+ crc8_populate_msb(ad74413r_crc8_table, AD74413R_CRC_POLYNOMIAL);
+
+ return spi_register_driver(spi);
+}
+
+static const struct ad74413r_chip_info ad74412r_chip_info_data = {
+ .hart_support = false,
+ .name = "ad74412r",
+};
+
+static const struct ad74413r_chip_info ad74413r_chip_info_data = {
+ .hart_support = true,
+ .name = "ad74413r",
+};
+
+static const struct of_device_id ad74413r_dt_id[] = {
+ {
+ .compatible = "adi,ad74412r",
+ .data = &ad74412r_chip_info_data,
+ },
+ {
+ .compatible = "adi,ad74413r",
+ .data = &ad74413r_chip_info_data,
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ad74413r_dt_id);
+
+static struct spi_driver ad74413r_driver = {
+ .driver = {
+ .name = "ad74413r",
+ .of_match_table = ad74413r_dt_id,
+ },
+ .probe = ad74413r_probe,
+};
+
+module_driver(ad74413r_driver,
+ ad74413r_register_driver,
+ ad74413r_unregister_driver);
+
+MODULE_AUTHOR("Cosmin Tanislav <cosmin.tanislav@analog.com>");
+MODULE_DESCRIPTION("Analog Devices AD74413R ADDAC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/amplifiers/hmc425a.c b/drivers/iio/amplifiers/hmc425a.c
index 9efa692151f0..16c0a77f6a1c 100644
--- a/drivers/iio/amplifiers/hmc425a.c
+++ b/drivers/iio/amplifiers/hmc425a.c
@@ -192,7 +192,7 @@ static int hmc425a_probe(struct platform_device *pdev)
return -ENOMEM;
st = iio_priv(indio_dev);
- st->type = (enum hmc425a_type)of_device_get_match_data(&pdev->dev);
+ st->type = (uintptr_t)of_device_get_match_data(&pdev->dev);
st->chip_info = &hmc425a_chip_info_tbl[st->type];
indio_dev->num_channels = st->chip_info->num_channels;
diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index 1ac94c4e9792..f8ce26a24c57 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
@@ -67,7 +67,7 @@ static int iio_dmaengine_buffer_submit_block(struct iio_dma_buffer_queue *queue,
dma_cookie_t cookie;
block->bytes_used = min(block->size, dmaengine_buffer->max_size);
- block->bytes_used = rounddown(block->bytes_used,
+ block->bytes_used = round_down(block->bytes_used,
dmaengine_buffer->align);
desc = dmaengine_prep_slave_single(dmaengine_buffer->chan,
diff --git a/drivers/iio/chemical/atlas-sensor.c b/drivers/iio/chemical/atlas-sensor.c
index 9cb99585b6ff..04b44a327614 100644
--- a/drivers/iio/chemical/atlas-sensor.c
+++ b/drivers/iio/chemical/atlas-sensor.c
@@ -434,9 +434,6 @@ static int atlas_buffer_predisable(struct iio_dev *indio_dev)
return 0;
}
-static const struct iio_trigger_ops atlas_interrupt_trigger_ops = {
-};
-
static const struct iio_buffer_setup_ops atlas_buffer_setup_ops = {
.postenable = atlas_buffer_postenable,
.predisable = atlas_buffer_predisable,
@@ -645,7 +642,6 @@ static int atlas_probe(struct i2c_client *client,
data->client = client;
data->trig = trig;
data->chip = chip;
- trig->ops = &atlas_interrupt_trigger_ops;
iio_trigger_set_drvdata(trig, indio_dev);
i2c_set_clientdata(client, indio_dev);
diff --git a/drivers/iio/chemical/sunrise_co2.c b/drivers/iio/chemical/sunrise_co2.c
index 233bd0f379c9..8440dc0c77cf 100644
--- a/drivers/iio/chemical/sunrise_co2.c
+++ b/drivers/iio/chemical/sunrise_co2.c
@@ -407,24 +407,24 @@ static int sunrise_read_raw(struct iio_dev *iio_dev,
mutex_lock(&sunrise->lock);
ret = sunrise_read_word(sunrise, SUNRISE_CO2_FILTERED_COMP_REG,
&value);
- *val = value;
mutex_unlock(&sunrise->lock);
if (ret)
return ret;
+ *val = value;
return IIO_VAL_INT;
case IIO_TEMP:
mutex_lock(&sunrise->lock);
ret = sunrise_read_word(sunrise, SUNRISE_CHIP_TEMPERATURE_REG,
&value);
- *val = value;
mutex_unlock(&sunrise->lock);
if (ret)
return ret;
+ *val = value;
return IIO_VAL_INT;
default:
diff --git a/drivers/iio/chemical/vz89x.c b/drivers/iio/chemical/vz89x.c
index 23b22a5f5c1c..e7e1c74a351e 100644
--- a/drivers/iio/chemical/vz89x.c
+++ b/drivers/iio/chemical/vz89x.c
@@ -242,7 +242,7 @@ static int vz89x_get_resistance_reading(struct vz89x_data *data,
struct iio_chan_spec const *chan,
int *val)
{
- u8 *tmp = (u8 *) &data->buffer[chan->address];
+ u8 *tmp = &data->buffer[chan->address];
switch (chan->scan_type.endianness) {
case IIO_LE:
diff --git a/drivers/iio/common/scmi_sensors/scmi_iio.c b/drivers/iio/common/scmi_sensors/scmi_iio.c
index 7cf2bf282cef..d538bf3ab1ef 100644
--- a/drivers/iio/common/scmi_sensors/scmi_iio.c
+++ b/drivers/iio/common/scmi_sensors/scmi_iio.c
@@ -279,6 +279,52 @@ static int scmi_iio_get_odr_val(struct iio_dev *iio_dev, int *val, int *val2)
return 0;
}
+static int scmi_iio_read_channel_data(struct iio_dev *iio_dev,
+ struct iio_chan_spec const *ch, int *val, int *val2)
+{
+ struct scmi_iio_priv *sensor = iio_priv(iio_dev);
+ u32 sensor_config;
+ struct scmi_sensor_reading readings[SCMI_IIO_NUM_OF_AXIS];
+ int err;
+
+ sensor_config = FIELD_PREP(SCMI_SENS_CFG_SENSOR_ENABLED_MASK,
+ SCMI_SENS_CFG_SENSOR_ENABLE);
+ err = sensor->sensor_ops->config_set(
+ sensor->ph, sensor->sensor_info->id, sensor_config);
+ if (err) {
+ dev_err(&iio_dev->dev,
+ "Error in enabling sensor %s err %d",
+ sensor->sensor_info->name, err);
+ return err;
+ }
+
+ err = sensor->sensor_ops->reading_get_timestamped(
+ sensor->ph, sensor->sensor_info->id,
+ sensor->sensor_info->num_axis, readings);
+ if (err) {
+ dev_err(&iio_dev->dev,
+ "Error in reading raw attribute for sensor %s err %d",
+ sensor->sensor_info->name, err);
+ return err;
+ }
+
+ sensor_config = FIELD_PREP(SCMI_SENS_CFG_SENSOR_ENABLED_MASK,
+ SCMI_SENS_CFG_SENSOR_DISABLE);
+ err = sensor->sensor_ops->config_set(
+ sensor->ph, sensor->sensor_info->id, sensor_config);
+ if (err) {
+ dev_err(&iio_dev->dev,
+ "Error in disabling sensor %s err %d",
+ sensor->sensor_info->name, err);
+ return err;
+ }
+
+ *val = lower_32_bits(readings[ch->scan_index].value);
+ *val2 = upper_32_bits(readings[ch->scan_index].value);
+
+ return IIO_VAL_INT_64;
+}
+
static int scmi_iio_read_raw(struct iio_dev *iio_dev,
struct iio_chan_spec const *ch, int *val,
int *val2, long mask)
@@ -300,6 +346,14 @@ static int scmi_iio_read_raw(struct iio_dev *iio_dev,
case IIO_CHAN_INFO_SAMP_FREQ:
ret = scmi_iio_get_odr_val(iio_dev, val, val2);
return ret ? ret : IIO_VAL_INT_PLUS_MICRO;
+ case IIO_CHAN_INFO_RAW:
+ ret = iio_device_claim_direct_mode(iio_dev);
+ if (ret)
+ return ret;
+
+ ret = scmi_iio_read_channel_data(iio_dev, ch, val, val2);
+ iio_device_release_direct_mode(iio_dev);
+ return ret;
default:
return -EINVAL;
}
@@ -381,7 +435,8 @@ static void scmi_iio_set_data_channel(struct iio_chan_spec *iio_chan,
iio_chan->type = type;
iio_chan->modified = 1;
iio_chan->channel2 = mod;
- iio_chan->info_mask_separate = BIT(IIO_CHAN_INFO_SCALE);
+ iio_chan->info_mask_separate =
+ BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_RAW);
iio_chan->info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ);
iio_chan->info_mask_shared_by_type_available =
BIT(IIO_CHAN_INFO_SAMP_FREQ);
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 1de395bda03e..eb452d0c423c 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -638,7 +638,7 @@ ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
struct device_attribute *attr, char *buf)
{
int i, len = 0;
- struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct st_sensor_data *sdata = iio_priv(indio_dev);
mutex_lock(&indio_dev->mlock);
@@ -660,7 +660,7 @@ ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
struct device_attribute *attr, char *buf)
{
int i, len = 0, q, r;
- struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct st_sensor_data *sdata = iio_priv(indio_dev);
mutex_lock(&indio_dev->mlock);
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index 75e1f2b48638..bfcf7568de32 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -6,6 +6,16 @@
menu "Digital to analog converters"
+config AD3552R
+ tristate "Analog Devices AD3552R DAC driver"
+ depends on SPI_MASTER
+ help
+ Say yes here to build support for Analog Devices AD3552R
+ Digital to Analog Converter.
+
+ To compile this driver as a module, choose M here: the
+ module will be called ad3552r.
+
config AD5064
tristate "Analog Devices AD5064 and similar multi-channel DAC driver"
depends on (SPI_MASTER && I2C!=m) || I2C
@@ -221,6 +231,17 @@ config AD5791
To compile this driver as a module, choose M here: the
module will be called ad5791.
+config AD7293
+ tristate "Analog Devices AD7293 Power Amplifier Current Controller"
+ depends on SPI
+ help
+ Say yes here to build support for Analog Devices AD7293
+ Power Amplifier Current Controller with
+ ADC, DACs, and Temperature and Current Sensors
+
+ To compile this driver as a module, choose M here: the
+ module will be called ad7293.
+
config AD7303
tristate "Analog Devices AD7303 DAC driver"
depends on SPI
@@ -329,7 +350,6 @@ config MAX517
config MAX5821
tristate "Maxim MAX5821 DAC driver"
depends on I2C
- depends on OF
help
Say yes here to build support for Maxim MAX5821
10 bits DAC.
diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile
index 33e16f14902a..01a50131572f 100644
--- a/drivers/iio/dac/Makefile
+++ b/drivers/iio/dac/Makefile
@@ -4,6 +4,7 @@
#
# When adding new entries keep the list in alphabetical order
+obj-$(CONFIG_AD3552R) += ad3552r.o
obj-$(CONFIG_AD5360) += ad5360.o
obj-$(CONFIG_AD5380) += ad5380.o
obj-$(CONFIG_AD5421) += ad5421.o
@@ -25,6 +26,7 @@ obj-$(CONFIG_AD5791) += ad5791.o
obj-$(CONFIG_AD5686) += ad5686.o
obj-$(CONFIG_AD5686_SPI) += ad5686-spi.o
obj-$(CONFIG_AD5696_I2C) += ad5696-i2c.o
+obj-$(CONFIG_AD7293) += ad7293.o
obj-$(CONFIG_AD7303) += ad7303.o
obj-$(CONFIG_AD8801) += ad8801.o
obj-$(CONFIG_CIO_DAC) += cio-dac.o
diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c
new file mode 100644
index 000000000000..97f13c0b9631
--- /dev/null
+++ b/drivers/iio/dac/ad3552r.c
@@ -0,0 +1,1138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Analog Devices AD3552R
+ * Digital to Analog converter driver
+ *
+ * Copyright 2021 Analog Devices Inc.
+ */
+#include <asm/unaligned.h>
+#include <linux/device.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+/* Register addresses */
+/* Primary address space */
+#define AD3552R_REG_ADDR_INTERFACE_CONFIG_A 0x00
+#define AD3552R_MASK_SOFTWARE_RESET (BIT(7) | BIT(0))
+#define AD3552R_MASK_ADDR_ASCENSION BIT(5)
+#define AD3552R_MASK_SDO_ACTIVE BIT(4)
+#define AD3552R_REG_ADDR_INTERFACE_CONFIG_B 0x01
+#define AD3552R_MASK_SINGLE_INST BIT(7)
+#define AD3552R_MASK_SHORT_INSTRUCTION BIT(3)
+#define AD3552R_REG_ADDR_DEVICE_CONFIG 0x02
+#define AD3552R_MASK_DEVICE_STATUS(n) BIT(4 + (n))
+#define AD3552R_MASK_CUSTOM_MODES GENMASK(3, 2)
+#define AD3552R_MASK_OPERATING_MODES GENMASK(1, 0)
+#define AD3552R_REG_ADDR_CHIP_TYPE 0x03
+#define AD3552R_MASK_CLASS GENMASK(7, 0)
+#define AD3552R_REG_ADDR_PRODUCT_ID_L 0x04
+#define AD3552R_REG_ADDR_PRODUCT_ID_H 0x05
+#define AD3552R_REG_ADDR_CHIP_GRADE 0x06
+#define AD3552R_MASK_GRADE GENMASK(7, 4)
+#define AD3552R_MASK_DEVICE_REVISION GENMASK(3, 0)
+#define AD3552R_REG_ADDR_SCRATCH_PAD 0x0A
+#define AD3552R_REG_ADDR_SPI_REVISION 0x0B
+#define AD3552R_REG_ADDR_VENDOR_L 0x0C
+#define AD3552R_REG_ADDR_VENDOR_H 0x0D
+#define AD3552R_REG_ADDR_STREAM_MODE 0x0E
+#define AD3552R_MASK_LENGTH GENMASK(7, 0)
+#define AD3552R_REG_ADDR_TRANSFER_REGISTER 0x0F
+#define AD3552R_MASK_MULTI_IO_MODE GENMASK(7, 6)
+#define AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE BIT(2)
+#define AD3552R_REG_ADDR_INTERFACE_CONFIG_C 0x10
+#define AD3552R_MASK_CRC_ENABLE (GENMASK(7, 6) |\
+ GENMASK(1, 0))
+#define AD3552R_MASK_STRICT_REGISTER_ACCESS BIT(5)
+#define AD3552R_REG_ADDR_INTERFACE_STATUS_A 0x11
+#define AD3552R_MASK_INTERFACE_NOT_READY BIT(7)
+#define AD3552R_MASK_CLOCK_COUNTING_ERROR BIT(5)
+#define AD3552R_MASK_INVALID_OR_NO_CRC BIT(3)
+#define AD3552R_MASK_WRITE_TO_READ_ONLY_REGISTER BIT(2)
+#define AD3552R_MASK_PARTIAL_REGISTER_ACCESS BIT(1)
+#define AD3552R_MASK_REGISTER_ADDRESS_INVALID BIT(0)
+#define AD3552R_REG_ADDR_INTERFACE_CONFIG_D 0x14
+#define AD3552R_MASK_ALERT_ENABLE_PULLUP BIT(6)
+#define AD3552R_MASK_MEM_CRC_EN BIT(4)
+#define AD3552R_MASK_SDO_DRIVE_STRENGTH GENMASK(3, 2)
+#define AD3552R_MASK_DUAL_SPI_SYNCHROUNOUS_EN BIT(1)
+#define AD3552R_MASK_SPI_CONFIG_DDR BIT(0)
+#define AD3552R_REG_ADDR_SH_REFERENCE_CONFIG 0x15
+#define AD3552R_MASK_IDUMP_FAST_MODE BIT(6)
+#define AD3552R_MASK_SAMPLE_HOLD_DIFFERENTIAL_USER_EN BIT(5)
+#define AD3552R_MASK_SAMPLE_HOLD_USER_TRIM GENMASK(4, 3)
+#define AD3552R_MASK_SAMPLE_HOLD_USER_ENABLE BIT(2)
+#define AD3552R_MASK_REFERENCE_VOLTAGE_SEL GENMASK(1, 0)
+#define AD3552R_REG_ADDR_ERR_ALARM_MASK 0x16
+#define AD3552R_MASK_REF_RANGE_ALARM BIT(6)
+#define AD3552R_MASK_CLOCK_COUNT_ERR_ALARM BIT(5)
+#define AD3552R_MASK_MEM_CRC_ERR_ALARM BIT(4)
+#define AD3552R_MASK_SPI_CRC_ERR_ALARM BIT(3)
+#define AD3552R_MASK_WRITE_TO_READ_ONLY_ALARM BIT(2)
+#define AD3552R_MASK_PARTIAL_REGISTER_ACCESS_ALARM BIT(1)
+#define AD3552R_MASK_REGISTER_ADDRESS_INVALID_ALARM BIT(0)
+#define AD3552R_REG_ADDR_ERR_STATUS 0x17
+#define AD3552R_MASK_REF_RANGE_ERR_STATUS BIT(6)
+#define AD3552R_MASK_DUAL_SPI_STREAM_EXCEEDS_DAC_ERR_STATUS BIT(5)
+#define AD3552R_MASK_MEM_CRC_ERR_STATUS BIT(4)
+#define AD3552R_MASK_RESET_STATUS BIT(0)
+#define AD3552R_REG_ADDR_POWERDOWN_CONFIG 0x18
+#define AD3552R_MASK_CH_DAC_POWERDOWN(ch) BIT(4 + (ch))
+#define AD3552R_MASK_CH_AMPLIFIER_POWERDOWN(ch) BIT(ch)
+#define AD3552R_REG_ADDR_CH0_CH1_OUTPUT_RANGE 0x19
+#define AD3552R_MASK_CH_OUTPUT_RANGE_SEL(ch) ((ch) ? GENMASK(7, 4) :\
+ GENMASK(3, 0))
+#define AD3552R_REG_ADDR_CH_OFFSET(ch) (0x1B + (ch) * 2)
+#define AD3552R_MASK_CH_OFFSET_BITS_0_7 GENMASK(7, 0)
+#define AD3552R_REG_ADDR_CH_GAIN(ch) (0x1C + (ch) * 2)
+#define AD3552R_MASK_CH_RANGE_OVERRIDE BIT(7)
+#define AD3552R_MASK_CH_GAIN_SCALING_N GENMASK(6, 5)
+#define AD3552R_MASK_CH_GAIN_SCALING_P GENMASK(4, 3)
+#define AD3552R_MASK_CH_OFFSET_POLARITY BIT(2)
+#define AD3552R_MASK_CH_OFFSET_BIT_8 BIT(0)
+/*
+ * Secondary region
+ * For multibyte registers specify the highest address because the access is
+ * done in descending order
+ */
+#define AD3552R_SECONDARY_REGION_START 0x28
+#define AD3552R_REG_ADDR_HW_LDAC_16B 0x28
+#define AD3552R_REG_ADDR_CH_DAC_16B(ch) (0x2C - (1 - ch) * 2)
+#define AD3552R_REG_ADDR_DAC_PAGE_MASK_16B 0x2E
+#define AD3552R_REG_ADDR_CH_SELECT_16B 0x2F
+#define AD3552R_REG_ADDR_INPUT_PAGE_MASK_16B 0x31
+#define AD3552R_REG_ADDR_SW_LDAC_16B 0x32
+#define AD3552R_REG_ADDR_CH_INPUT_16B(ch) (0x36 - (1 - ch) * 2)
+/* 3 bytes registers */
+#define AD3552R_REG_START_24B 0x37
+#define AD3552R_REG_ADDR_HW_LDAC_24B 0x37
+#define AD3552R_REG_ADDR_CH_DAC_24B(ch) (0x3D - (1 - ch) * 3)
+#define AD3552R_REG_ADDR_DAC_PAGE_MASK_24B 0x40
+#define AD3552R_REG_ADDR_CH_SELECT_24B 0x41
+#define AD3552R_REG_ADDR_INPUT_PAGE_MASK_24B 0x44
+#define AD3552R_REG_ADDR_SW_LDAC_24B 0x45
+#define AD3552R_REG_ADDR_CH_INPUT_24B(ch) (0x4B - (1 - ch) * 3)
+
+/* Useful defines */
+#define AD3552R_NUM_CH 2
+#define AD3552R_MASK_CH(ch) BIT(ch)
+#define AD3552R_MASK_ALL_CH GENMASK(1, 0)
+#define AD3552R_MAX_REG_SIZE 3
+#define AD3552R_READ_BIT BIT(7)
+#define AD3552R_ADDR_MASK GENMASK(6, 0)
+#define AD3552R_MASK_DAC_12B 0xFFF0
+#define AD3552R_DEFAULT_CONFIG_B_VALUE 0x8
+#define AD3552R_SCRATCH_PAD_TEST_VAL1 0x34
+#define AD3552R_SCRATCH_PAD_TEST_VAL2 0xB2
+#define AD3552R_GAIN_SCALE 1000
+#define AD3552R_LDAC_PULSE_US 100
+
+enum ad3552r_ch_vref_select {
+ /* Internal source with Vref I/O floating */
+ AD3552R_INTERNAL_VREF_PIN_FLOATING,
+ /* Internal source with Vref I/O at 2.5V */
+ AD3552R_INTERNAL_VREF_PIN_2P5V,
+ /* External source with Vref I/O as input */
+ AD3552R_EXTERNAL_VREF_PIN_INPUT
+};
+
+enum ad3542r_id {
+ AD3542R_ID = 0x4008,
+ AD3552R_ID = 0x4009,
+};
+
+enum ad3552r_ch_output_range {
+ /* Range from 0 V to 2.5 V. Requires Rfb1x connection */
+ AD3552R_CH_OUTPUT_RANGE_0__2P5V,
+ /* Range from 0 V to 5 V. Requires Rfb1x connection */
+ AD3552R_CH_OUTPUT_RANGE_0__5V,
+ /* Range from 0 V to 10 V. Requires Rfb2x connection */
+ AD3552R_CH_OUTPUT_RANGE_0__10V,
+ /* Range from -5 V to 5 V. Requires Rfb2x connection */
+ AD3552R_CH_OUTPUT_RANGE_NEG_5__5V,
+ /* Range from -10 V to 10 V. Requires Rfb4x connection */
+ AD3552R_CH_OUTPUT_RANGE_NEG_10__10V,
+};
+
+static const s32 ad3552r_ch_ranges[][2] = {
+ [AD3552R_CH_OUTPUT_RANGE_0__2P5V] = {0, 2500},
+ [AD3552R_CH_OUTPUT_RANGE_0__5V] = {0, 5000},
+ [AD3552R_CH_OUTPUT_RANGE_0__10V] = {0, 10000},
+ [AD3552R_CH_OUTPUT_RANGE_NEG_5__5V] = {-5000, 5000},
+ [AD3552R_CH_OUTPUT_RANGE_NEG_10__10V] = {-10000, 10000}
+};
+
+enum ad3542r_ch_output_range {
+ /* Range from 0 V to 2.5 V. Requires Rfb1x connection */
+ AD3542R_CH_OUTPUT_RANGE_0__2P5V,
+ /* Range from 0 V to 3 V. Requires Rfb1x connection */
+ AD3542R_CH_OUTPUT_RANGE_0__3V,
+ /* Range from 0 V to 5 V. Requires Rfb1x connection */
+ AD3542R_CH_OUTPUT_RANGE_0__5V,
+ /* Range from 0 V to 10 V. Requires Rfb2x connection */
+ AD3542R_CH_OUTPUT_RANGE_0__10V,
+ /* Range from -2.5 V to 7.5 V. Requires Rfb2x connection */
+ AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V,
+ /* Range from -5 V to 5 V. Requires Rfb2x connection */
+ AD3542R_CH_OUTPUT_RANGE_NEG_5__5V,
+};
+
+static const s32 ad3542r_ch_ranges[][2] = {
+ [AD3542R_CH_OUTPUT_RANGE_0__2P5V] = {0, 2500},
+ [AD3542R_CH_OUTPUT_RANGE_0__3V] = {0, 3000},
+ [AD3542R_CH_OUTPUT_RANGE_0__5V] = {0, 5000},
+ [AD3542R_CH_OUTPUT_RANGE_0__10V] = {0, 10000},
+ [AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V] = {-2500, 7500},
+ [AD3542R_CH_OUTPUT_RANGE_NEG_5__5V] = {-5000, 5000}
+};
+
+enum ad3552r_ch_gain_scaling {
+ /* Gain scaling of 1 */
+ AD3552R_CH_GAIN_SCALING_1,
+ /* Gain scaling of 0.5 */
+ AD3552R_CH_GAIN_SCALING_0_5,
+ /* Gain scaling of 0.25 */
+ AD3552R_CH_GAIN_SCALING_0_25,
+ /* Gain scaling of 0.125 */
+ AD3552R_CH_GAIN_SCALING_0_125,
+};
+
+/* Gain * AD3552R_GAIN_SCALE */
+static const s32 gains_scaling_table[] = {
+ [AD3552R_CH_GAIN_SCALING_1] = 1000,
+ [AD3552R_CH_GAIN_SCALING_0_5] = 500,
+ [AD3552R_CH_GAIN_SCALING_0_25] = 250,
+ [AD3552R_CH_GAIN_SCALING_0_125] = 125
+};
+
+enum ad3552r_dev_attributes {
+ /* - Direct register values */
+ /* From 0-3 */
+ AD3552R_SDO_DRIVE_STRENGTH,
+ /*
+ * 0 -> Internal Vref, vref_io pin floating (default)
+ * 1 -> Internal Vref, vref_io driven by internal vref
+ * 2 or 3 -> External Vref
+ */
+ AD3552R_VREF_SELECT,
+ /* Read registers in ascending order if set. Else descending */
+ AD3552R_ADDR_ASCENSION,
+};
+
+enum ad3552r_ch_attributes {
+ /* DAC powerdown */
+ AD3552R_CH_DAC_POWERDOWN,
+ /* DAC amplifier powerdown */
+ AD3552R_CH_AMPLIFIER_POWERDOWN,
+ /* Select the output range. Select from enum ad3552r_ch_output_range */
+ AD3552R_CH_OUTPUT_RANGE_SEL,
+ /*
+ * Over-rider the range selector in order to manually set the output
+ * voltage range
+ */
+ AD3552R_CH_RANGE_OVERRIDE,
+ /* Manually set the offset voltage */
+ AD3552R_CH_GAIN_OFFSET,
+ /* Sets the polarity of the offset. */
+ AD3552R_CH_GAIN_OFFSET_POLARITY,
+ /* PDAC gain scaling */
+ AD3552R_CH_GAIN_SCALING_P,
+ /* NDAC gain scaling */
+ AD3552R_CH_GAIN_SCALING_N,
+ /* Rfb value */
+ AD3552R_CH_RFB,
+ /* Channel select. When set allow Input -> DAC and Mask -> DAC */
+ AD3552R_CH_SELECT,
+};
+
+struct ad3552r_ch_data {
+ s32 scale_int;
+ s32 scale_dec;
+ s32 offset_int;
+ s32 offset_dec;
+ s16 gain_offset;
+ u16 rfb;
+ u8 n;
+ u8 p;
+ u8 range;
+ bool range_override;
+};
+
+struct ad3552r_desc {
+ /* Used to look the spi bus for atomic operations where needed */
+ struct mutex lock;
+ struct gpio_desc *gpio_reset;
+ struct gpio_desc *gpio_ldac;
+ struct spi_device *spi;
+ struct ad3552r_ch_data ch_data[AD3552R_NUM_CH];
+ struct iio_chan_spec channels[AD3552R_NUM_CH + 1];
+ unsigned long enabled_ch;
+ unsigned int num_ch;
+ enum ad3542r_id chip_id;
+};
+
+static const u16 addr_mask_map[][2] = {
+ [AD3552R_ADDR_ASCENSION] = {
+ AD3552R_REG_ADDR_INTERFACE_CONFIG_A,
+ AD3552R_MASK_ADDR_ASCENSION
+ },
+ [AD3552R_SDO_DRIVE_STRENGTH] = {
+ AD3552R_REG_ADDR_INTERFACE_CONFIG_D,
+ AD3552R_MASK_SDO_DRIVE_STRENGTH
+ },
+ [AD3552R_VREF_SELECT] = {
+ AD3552R_REG_ADDR_SH_REFERENCE_CONFIG,
+ AD3552R_MASK_REFERENCE_VOLTAGE_SEL
+ },
+};
+
+/* 0 -> reg addr, 1->ch0 mask, 2->ch1 mask */
+static const u16 addr_mask_map_ch[][3] = {
+ [AD3552R_CH_DAC_POWERDOWN] = {
+ AD3552R_REG_ADDR_POWERDOWN_CONFIG,
+ AD3552R_MASK_CH_DAC_POWERDOWN(0),
+ AD3552R_MASK_CH_DAC_POWERDOWN(1)
+ },
+ [AD3552R_CH_AMPLIFIER_POWERDOWN] = {
+ AD3552R_REG_ADDR_POWERDOWN_CONFIG,
+ AD3552R_MASK_CH_AMPLIFIER_POWERDOWN(0),
+ AD3552R_MASK_CH_AMPLIFIER_POWERDOWN(1)
+ },
+ [AD3552R_CH_OUTPUT_RANGE_SEL] = {
+ AD3552R_REG_ADDR_CH0_CH1_OUTPUT_RANGE,
+ AD3552R_MASK_CH_OUTPUT_RANGE_SEL(0),
+ AD3552R_MASK_CH_OUTPUT_RANGE_SEL(1)
+ },
+ [AD3552R_CH_SELECT] = {
+ AD3552R_REG_ADDR_CH_SELECT_16B,
+ AD3552R_MASK_CH(0),
+ AD3552R_MASK_CH(1)
+ }
+};
+
+static u8 _ad3552r_reg_len(u8 addr)
+{
+ switch (addr) {
+ case AD3552R_REG_ADDR_HW_LDAC_16B:
+ case AD3552R_REG_ADDR_CH_SELECT_16B:
+ case AD3552R_REG_ADDR_SW_LDAC_16B:
+ case AD3552R_REG_ADDR_HW_LDAC_24B:
+ case AD3552R_REG_ADDR_CH_SELECT_24B:
+ case AD3552R_REG_ADDR_SW_LDAC_24B:
+ return 1;
+ default:
+ break;
+ }
+
+ if (addr > AD3552R_REG_ADDR_HW_LDAC_24B)
+ return 3;
+ if (addr > AD3552R_REG_ADDR_HW_LDAC_16B)
+ return 2;
+
+ return 1;
+}
+
+/* SPI transfer to device */
+static int ad3552r_transfer(struct ad3552r_desc *dac, u8 addr, u32 len,
+ u8 *data, bool is_read)
+{
+ /* Maximum transfer: Addr (1B) + 2 * (Data Reg (3B)) + SW LDAC(1B) */
+ u8 buf[8];
+
+ buf[0] = addr & AD3552R_ADDR_MASK;
+ buf[0] |= is_read ? AD3552R_READ_BIT : 0;
+ if (is_read)
+ return spi_write_then_read(dac->spi, buf, 1, data, len);
+
+ memcpy(buf + 1, data, len);
+ return spi_write_then_read(dac->spi, buf, len + 1, NULL, 0);
+}
+
+static int ad3552r_write_reg(struct ad3552r_desc *dac, u8 addr, u16 val)
+{
+ u8 reg_len;
+ u8 buf[AD3552R_MAX_REG_SIZE] = { 0 };
+
+ reg_len = _ad3552r_reg_len(addr);
+ if (reg_len == 2)
+ /* Only DAC register are 2 bytes wide */
+ val &= AD3552R_MASK_DAC_12B;
+ if (reg_len == 1)
+ buf[0] = val & 0xFF;
+ else
+ /* reg_len can be 2 or 3, but 3rd bytes needs to be set to 0 */
+ put_unaligned_be16(val, buf);
+
+ return ad3552r_transfer(dac, addr, reg_len, buf, false);
+}
+
+static int ad3552r_read_reg(struct ad3552r_desc *dac, u8 addr, u16 *val)
+{
+ int err;
+ u8 reg_len, buf[AD3552R_MAX_REG_SIZE] = { 0 };
+
+ reg_len = _ad3552r_reg_len(addr);
+ err = ad3552r_transfer(dac, addr, reg_len, buf, true);
+ if (err)
+ return err;
+
+ if (reg_len == 1)
+ *val = buf[0];
+ else
+ /* reg_len can be 2 or 3, but only first 2 bytes are relevant */
+ *val = get_unaligned_be16(buf);
+
+ return 0;
+}
+
+static u16 ad3552r_field_prep(u16 val, u16 mask)
+{
+ return (val << __ffs(mask)) & mask;
+}
+
+/* Update field of a register, shift val if needed */
+static int ad3552r_update_reg_field(struct ad3552r_desc *dac, u8 addr, u16 mask,
+ u16 val)
+{
+ int ret;
+ u16 reg;
+
+ ret = ad3552r_read_reg(dac, addr, &reg);
+ if (ret < 0)
+ return ret;
+
+ reg &= ~mask;
+ reg |= ad3552r_field_prep(val, mask);
+
+ return ad3552r_write_reg(dac, addr, reg);
+}
+
+static int ad3552r_set_ch_value(struct ad3552r_desc *dac,
+ enum ad3552r_ch_attributes attr,
+ u8 ch,
+ u16 val)
+{
+ /* Update register related to attributes in chip */
+ return ad3552r_update_reg_field(dac, addr_mask_map_ch[attr][0],
+ addr_mask_map_ch[attr][ch + 1], val);
+}
+
+#define AD3552R_CH_DAC(_idx) ((struct iio_chan_spec) { \
+ .type = IIO_VOLTAGE, \
+ .output = true, \
+ .indexed = true, \
+ .channel = _idx, \
+ .scan_index = _idx, \
+ .scan_type = { \
+ .sign = 'u', \
+ .realbits = 16, \
+ .storagebits = 16, \
+ .endianness = IIO_BE, \
+ }, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_SCALE) | \
+ BIT(IIO_CHAN_INFO_ENABLE) | \
+ BIT(IIO_CHAN_INFO_OFFSET), \
+})
+
+static int ad3552r_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val,
+ int *val2,
+ long mask)
+{
+ struct ad3552r_desc *dac = iio_priv(indio_dev);
+ u16 tmp_val;
+ int err;
+ u8 ch = chan->channel;
+
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
+ mutex_lock(&dac->lock);
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_CH_DAC_24B(ch),
+ &tmp_val);
+ mutex_unlock(&dac->lock);
+ if (err < 0)
+ return err;
+ *val = tmp_val;
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_ENABLE:
+ mutex_lock(&dac->lock);
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_POWERDOWN_CONFIG,
+ &tmp_val);
+ mutex_unlock(&dac->lock);
+ if (err < 0)
+ return err;
+ *val = !((tmp_val & AD3552R_MASK_CH_DAC_POWERDOWN(ch)) >>
+ __ffs(AD3552R_MASK_CH_DAC_POWERDOWN(ch)));
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_SCALE:
+ *val = dac->ch_data[ch].scale_int;
+ *val2 = dac->ch_data[ch].scale_dec;
+ return IIO_VAL_INT_PLUS_MICRO;
+ case IIO_CHAN_INFO_OFFSET:
+ *val = dac->ch_data[ch].offset_int;
+ *val2 = dac->ch_data[ch].offset_dec;
+ return IIO_VAL_INT_PLUS_MICRO;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ad3552r_write_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int val,
+ int val2,
+ long mask)
+{
+ struct ad3552r_desc *dac = iio_priv(indio_dev);
+ int err;
+
+ mutex_lock(&dac->lock);
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
+ err = ad3552r_write_reg(dac,
+ AD3552R_REG_ADDR_CH_DAC_24B(chan->channel),
+ val);
+ break;
+ case IIO_CHAN_INFO_ENABLE:
+ err = ad3552r_set_ch_value(dac, AD3552R_CH_DAC_POWERDOWN,
+ chan->channel, !val);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ mutex_unlock(&dac->lock);
+
+ return err;
+}
+
+static const struct iio_info ad3552r_iio_info = {
+ .read_raw = ad3552r_read_raw,
+ .write_raw = ad3552r_write_raw
+};
+
+static int32_t ad3552r_trigger_hw_ldac(struct gpio_desc *ldac)
+{
+ gpiod_set_value_cansleep(ldac, 0);
+ usleep_range(AD3552R_LDAC_PULSE_US, AD3552R_LDAC_PULSE_US + 10);
+ gpiod_set_value_cansleep(ldac, 1);
+
+ return 0;
+}
+
+static int ad3552r_write_all_channels(struct ad3552r_desc *dac, u8 *data)
+{
+ int err, len;
+ u8 addr, buff[AD3552R_NUM_CH * AD3552R_MAX_REG_SIZE + 1];
+
+ addr = AD3552R_REG_ADDR_CH_INPUT_24B(1);
+ /* CH1 */
+ memcpy(buff, data + 2, 2);
+ buff[2] = 0;
+ /* CH0 */
+ memcpy(buff + 3, data, 2);
+ buff[5] = 0;
+ len = 6;
+ if (!dac->gpio_ldac) {
+ /* Software LDAC */
+ buff[6] = AD3552R_MASK_ALL_CH;
+ ++len;
+ }
+ err = ad3552r_transfer(dac, addr, len, buff, false);
+ if (err)
+ return err;
+
+ if (dac->gpio_ldac)
+ return ad3552r_trigger_hw_ldac(dac->gpio_ldac);
+
+ return 0;
+}
+
+static int ad3552r_write_codes(struct ad3552r_desc *dac, u32 mask, u8 *data)
+{
+ int err;
+ u8 addr, buff[AD3552R_MAX_REG_SIZE];
+
+ if (mask == AD3552R_MASK_ALL_CH) {
+ if (memcmp(data, data + 2, 2) != 0)
+ return ad3552r_write_all_channels(dac, data);
+
+ addr = AD3552R_REG_ADDR_INPUT_PAGE_MASK_24B;
+ } else {
+ addr = AD3552R_REG_ADDR_CH_INPUT_24B(__ffs(mask));
+ }
+
+ memcpy(buff, data, 2);
+ buff[2] = 0;
+ err = ad3552r_transfer(dac, addr, 3, data, false);
+ if (err)
+ return err;
+
+ if (dac->gpio_ldac)
+ return ad3552r_trigger_hw_ldac(dac->gpio_ldac);
+
+ return ad3552r_write_reg(dac, AD3552R_REG_ADDR_SW_LDAC_24B, mask);
+}
+
+static irqreturn_t ad3552r_trigger_handler(int irq, void *p)
+{
+ struct iio_poll_func *pf = p;
+ struct iio_dev *indio_dev = pf->indio_dev;
+ struct iio_buffer *buf = indio_dev->buffer;
+ struct ad3552r_desc *dac = iio_priv(indio_dev);
+ /* Maximum size of a scan */
+ u8 buff[AD3552R_NUM_CH * AD3552R_MAX_REG_SIZE];
+ int err;
+
+ memset(buff, 0, sizeof(buff));
+ err = iio_pop_from_buffer(buf, buff);
+ if (err)
+ goto end;
+
+ mutex_lock(&dac->lock);
+ ad3552r_write_codes(dac, *indio_dev->active_scan_mask, buff);
+ mutex_unlock(&dac->lock);
+end:
+ iio_trigger_notify_done(indio_dev->trig);
+
+ return IRQ_HANDLED;
+}
+
+static int ad3552r_check_scratch_pad(struct ad3552r_desc *dac)
+{
+ const u16 val1 = AD3552R_SCRATCH_PAD_TEST_VAL1;
+ const u16 val2 = AD3552R_SCRATCH_PAD_TEST_VAL2;
+ u16 val;
+ int err;
+
+ err = ad3552r_write_reg(dac, AD3552R_REG_ADDR_SCRATCH_PAD, val1);
+ if (err < 0)
+ return err;
+
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_SCRATCH_PAD, &val);
+ if (err < 0)
+ return err;
+
+ if (val1 != val)
+ return -ENODEV;
+
+ err = ad3552r_write_reg(dac, AD3552R_REG_ADDR_SCRATCH_PAD, val2);
+ if (err < 0)
+ return err;
+
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_SCRATCH_PAD, &val);
+ if (err < 0)
+ return err;
+
+ if (val2 != val)
+ return -ENODEV;
+
+ return 0;
+}
+
+struct reg_addr_pool {
+ struct ad3552r_desc *dac;
+ u8 addr;
+};
+
+static int ad3552r_read_reg_wrapper(struct reg_addr_pool *addr)
+{
+ int err;
+ u16 val;
+
+ err = ad3552r_read_reg(addr->dac, addr->addr, &val);
+ if (err)
+ return err;
+
+ return val;
+}
+
+static int ad3552r_reset(struct ad3552r_desc *dac)
+{
+ struct reg_addr_pool addr;
+ int ret;
+ u16 val;
+
+ dac->gpio_reset = devm_gpiod_get_optional(&dac->spi->dev, "reset",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(dac->gpio_reset))
+ return dev_err_probe(&dac->spi->dev, PTR_ERR(dac->gpio_reset),
+ "Error while getting gpio reset");
+
+ if (dac->gpio_reset) {
+ /* Perform hardware reset */
+ usleep_range(10, 20);
+ gpiod_set_value_cansleep(dac->gpio_reset, 1);
+ } else {
+ /* Perform software reset if no GPIO provided */
+ ret = ad3552r_update_reg_field(dac,
+ AD3552R_REG_ADDR_INTERFACE_CONFIG_A,
+ AD3552R_MASK_SOFTWARE_RESET,
+ AD3552R_MASK_SOFTWARE_RESET);
+ if (ret < 0)
+ return ret;
+
+ }
+
+ addr.dac = dac;
+ addr.addr = AD3552R_REG_ADDR_INTERFACE_CONFIG_B;
+ ret = readx_poll_timeout(ad3552r_read_reg_wrapper, &addr, val,
+ val == AD3552R_DEFAULT_CONFIG_B_VALUE ||
+ val < 0,
+ 5000, 50000);
+ if (val < 0)
+ ret = val;
+ if (ret) {
+ dev_err(&dac->spi->dev, "Error while resetting");
+ return ret;
+ }
+
+ ret = readx_poll_timeout(ad3552r_read_reg_wrapper, &addr, val,
+ !(val & AD3552R_MASK_INTERFACE_NOT_READY) ||
+ val < 0,
+ 5000, 50000);
+ if (val < 0)
+ ret = val;
+ if (ret) {
+ dev_err(&dac->spi->dev, "Error while resetting");
+ return ret;
+ }
+
+ return ad3552r_update_reg_field(dac,
+ addr_mask_map[AD3552R_ADDR_ASCENSION][0],
+ addr_mask_map[AD3552R_ADDR_ASCENSION][1],
+ val);
+}
+
+static void ad3552r_get_custom_range(struct ad3552r_desc *dac, s32 i, s32 *v_min,
+ s32 *v_max)
+{
+ s64 vref, tmp, common, offset, gn, gp;
+ /*
+ * From datasheet formula (In Volts):
+ * Vmin = 2.5 + [(GainN + Offset / 1024) * 2.5 * Rfb * 1.03]
+ * Vmax = 2.5 - [(GainP + Offset / 1024) * 2.5 * Rfb * 1.03]
+ * Calculus are converted to milivolts
+ */
+ vref = 2500;
+ /* 2.5 * 1.03 * 1000 (To mV) */
+ common = 2575 * dac->ch_data[i].rfb;
+ offset = dac->ch_data[i].gain_offset;
+
+ gn = gains_scaling_table[dac->ch_data[i].n];
+ tmp = (1024 * gn + AD3552R_GAIN_SCALE * offset) * common;
+ tmp = div_s64(tmp, 1024 * AD3552R_GAIN_SCALE);
+ *v_max = vref + tmp;
+
+ gp = gains_scaling_table[dac->ch_data[i].p];
+ tmp = (1024 * gp - AD3552R_GAIN_SCALE * offset) * common;
+ tmp = div_s64(tmp, 1024 * AD3552R_GAIN_SCALE);
+ *v_min = vref - tmp;
+}
+
+static void ad3552r_calc_gain_and_offset(struct ad3552r_desc *dac, s32 ch)
+{
+ s32 idx, v_max, v_min, span, rem;
+ s64 tmp;
+
+ if (dac->ch_data[ch].range_override) {
+ ad3552r_get_custom_range(dac, ch, &v_min, &v_max);
+ } else {
+ /* Normal range */
+ idx = dac->ch_data[ch].range;
+ if (dac->chip_id == AD3542R_ID) {
+ v_min = ad3542r_ch_ranges[idx][0];
+ v_max = ad3542r_ch_ranges[idx][1];
+ } else {
+ v_min = ad3552r_ch_ranges[idx][0];
+ v_max = ad3552r_ch_ranges[idx][1];
+ }
+ }
+
+ /*
+ * From datasheet formula:
+ * Vout = Span * (D / 65536) + Vmin
+ * Converted to scale and offset:
+ * Scale = Span / 65536
+ * Offset = 65536 * Vmin / Span
+ *
+ * Reminders are in micros in order to be printed as
+ * IIO_VAL_INT_PLUS_MICRO
+ */
+ span = v_max - v_min;
+ dac->ch_data[ch].scale_int = div_s64_rem(span, 65536, &rem);
+ /* Do operations in microvolts */
+ dac->ch_data[ch].scale_dec = DIV_ROUND_CLOSEST((s64)rem * 1000000,
+ 65536);
+
+ dac->ch_data[ch].offset_int = div_s64_rem(v_min * 65536, span, &rem);
+ tmp = (s64)rem * 1000000;
+ dac->ch_data[ch].offset_dec = div_s64(tmp, span);
+}
+
+static int ad3552r_find_range(u16 id, s32 *vals)
+{
+ int i, len;
+ const s32 (*ranges)[2];
+
+ if (id == AD3542R_ID) {
+ len = ARRAY_SIZE(ad3542r_ch_ranges);
+ ranges = ad3542r_ch_ranges;
+ } else {
+ len = ARRAY_SIZE(ad3552r_ch_ranges);
+ ranges = ad3552r_ch_ranges;
+ }
+
+ for (i = 0; i < len; i++)
+ if (vals[0] == ranges[i][0] * 1000 &&
+ vals[1] == ranges[i][1] * 1000)
+ return i;
+
+ return -EINVAL;
+}
+
+static int ad3552r_configure_custom_gain(struct ad3552r_desc *dac,
+ struct fwnode_handle *child,
+ u32 ch)
+{
+ struct device *dev = &dac->spi->dev;
+ struct fwnode_handle *gain_child;
+ u32 val;
+ int err;
+ u8 addr;
+ u16 reg = 0, offset;
+
+ gain_child = fwnode_get_named_child_node(child,
+ "custom-output-range-config");
+ if (IS_ERR(gain_child)) {
+ dev_err(dev,
+ "mandatory custom-output-range-config property missing\n");
+ return PTR_ERR(gain_child);
+ }
+
+ dac->ch_data[ch].range_override = 1;
+ reg |= ad3552r_field_prep(1, AD3552R_MASK_CH_RANGE_OVERRIDE);
+
+ err = fwnode_property_read_u32(gain_child, "adi,gain-scaling-p", &val);
+ if (err) {
+ dev_err(dev, "mandatory adi,gain-scaling-p property missing\n");
+ goto put_child;
+ }
+ reg |= ad3552r_field_prep(val, AD3552R_MASK_CH_GAIN_SCALING_P);
+ dac->ch_data[ch].p = val;
+
+ err = fwnode_property_read_u32(gain_child, "adi,gain-scaling-n", &val);
+ if (err) {
+ dev_err(dev, "mandatory adi,gain-scaling-n property missing\n");
+ goto put_child;
+ }
+ reg |= ad3552r_field_prep(val, AD3552R_MASK_CH_GAIN_SCALING_N);
+ dac->ch_data[ch].n = val;
+
+ err = fwnode_property_read_u32(gain_child, "adi,rfb-ohms", &val);
+ if (err) {
+ dev_err(dev, "mandatory adi,rfb-ohms property missing\n");
+ goto put_child;
+ }
+ dac->ch_data[ch].rfb = val;
+
+ err = fwnode_property_read_u32(gain_child, "adi,gain-offset", &val);
+ if (err) {
+ dev_err(dev, "mandatory adi,gain-offset property missing\n");
+ goto put_child;
+ }
+ dac->ch_data[ch].gain_offset = val;
+
+ offset = abs((s32)val);
+ reg |= ad3552r_field_prep((offset >> 8), AD3552R_MASK_CH_OFFSET_BIT_8);
+
+ reg |= ad3552r_field_prep((s32)val < 0, AD3552R_MASK_CH_OFFSET_POLARITY);
+ addr = AD3552R_REG_ADDR_CH_GAIN(ch);
+ err = ad3552r_write_reg(dac, addr,
+ offset & AD3552R_MASK_CH_OFFSET_BITS_0_7);
+ if (err) {
+ dev_err(dev, "Error writing register\n");
+ goto put_child;
+ }
+
+ err = ad3552r_write_reg(dac, addr, reg);
+ if (err) {
+ dev_err(dev, "Error writing register\n");
+ goto put_child;
+ }
+
+put_child:
+ fwnode_handle_put(gain_child);
+
+ return err;
+}
+
+static void ad3552r_reg_disable(void *reg)
+{
+ regulator_disable(reg);
+}
+
+static int ad3552r_configure_device(struct ad3552r_desc *dac)
+{
+ struct device *dev = &dac->spi->dev;
+ struct fwnode_handle *child;
+ struct regulator *vref;
+ int err, cnt = 0, voltage, delta = 100000;
+ u32 vals[2], val, ch;
+
+ dac->gpio_ldac = devm_gpiod_get_optional(dev, "ldac", GPIOD_OUT_HIGH);
+ if (IS_ERR(dac->gpio_ldac))
+ return dev_err_probe(dev, PTR_ERR(dac->gpio_ldac),
+ "Error getting gpio ldac");
+
+ vref = devm_regulator_get_optional(dev, "vref");
+ if (IS_ERR(vref)) {
+ if (PTR_ERR(vref) != -ENODEV)
+ return dev_err_probe(dev, PTR_ERR(vref),
+ "Error getting vref");
+
+ if (device_property_read_bool(dev, "adi,vref-out-en"))
+ val = AD3552R_INTERNAL_VREF_PIN_2P5V;
+ else
+ val = AD3552R_INTERNAL_VREF_PIN_FLOATING;
+ } else {
+ err = regulator_enable(vref);
+ if (err) {
+ dev_err(dev, "Failed to enable external vref supply\n");
+ return err;
+ }
+
+ err = devm_add_action_or_reset(dev, ad3552r_reg_disable, vref);
+ if (err) {
+ regulator_disable(vref);
+ return err;
+ }
+
+ voltage = regulator_get_voltage(vref);
+ if (voltage > 2500000 + delta || voltage < 2500000 - delta) {
+ dev_warn(dev, "vref-supply must be 2.5V");
+ return -EINVAL;
+ }
+ val = AD3552R_EXTERNAL_VREF_PIN_INPUT;
+ }
+
+ err = ad3552r_update_reg_field(dac,
+ addr_mask_map[AD3552R_VREF_SELECT][0],
+ addr_mask_map[AD3552R_VREF_SELECT][1],
+ val);
+ if (err)
+ return err;
+
+ err = device_property_read_u32(dev, "adi,sdo-drive-strength", &val);
+ if (!err) {
+ if (val > 3) {
+ dev_err(dev, "adi,sdo-drive-strength must be less than 4\n");
+ return -EINVAL;
+ }
+
+ err = ad3552r_update_reg_field(dac,
+ addr_mask_map[AD3552R_SDO_DRIVE_STRENGTH][0],
+ addr_mask_map[AD3552R_SDO_DRIVE_STRENGTH][1],
+ val);
+ if (err)
+ return err;
+ }
+
+ dac->num_ch = device_get_child_node_count(dev);
+ if (!dac->num_ch) {
+ dev_err(dev, "No channels defined\n");
+ return -ENODEV;
+ }
+
+ device_for_each_child_node(dev, child) {
+ err = fwnode_property_read_u32(child, "reg", &ch);
+ if (err) {
+ dev_err(dev, "mandatory reg property missing\n");
+ goto put_child;
+ }
+ if (ch >= AD3552R_NUM_CH) {
+ dev_err(dev, "reg must be less than %d\n",
+ AD3552R_NUM_CH);
+ err = -EINVAL;
+ goto put_child;
+ }
+
+ if (fwnode_property_present(child, "adi,output-range-microvolt")) {
+ err = fwnode_property_read_u32_array(child,
+ "adi,output-range-microvolt",
+ vals,
+ 2);
+ if (err) {
+ dev_err(dev,
+ "adi,output-range-microvolt property could not be parsed\n");
+ goto put_child;
+ }
+
+ err = ad3552r_find_range(dac->chip_id, vals);
+ if (err < 0) {
+ dev_err(dev,
+ "Invalid adi,output-range-microvolt value\n");
+ goto put_child;
+ }
+ val = err;
+ err = ad3552r_set_ch_value(dac,
+ AD3552R_CH_OUTPUT_RANGE_SEL,
+ ch, val);
+ if (err)
+ goto put_child;
+
+ dac->ch_data[ch].range = val;
+ } else if (dac->chip_id == AD3542R_ID) {
+ dev_err(dev,
+ "adi,output-range-microvolt is required for ad3542r\n");
+ err = -EINVAL;
+ goto put_child;
+ } else {
+ err = ad3552r_configure_custom_gain(dac, child, ch);
+ if (err)
+ goto put_child;
+ }
+
+ ad3552r_calc_gain_and_offset(dac, ch);
+ dac->enabled_ch |= BIT(ch);
+
+ err = ad3552r_set_ch_value(dac, AD3552R_CH_SELECT, ch, 1);
+ if (err < 0)
+ goto put_child;
+
+ dac->channels[cnt] = AD3552R_CH_DAC(ch);
+ ++cnt;
+
+ }
+
+ /* Disable unused channels */
+ for_each_clear_bit(ch, &dac->enabled_ch, AD3552R_NUM_CH) {
+ err = ad3552r_set_ch_value(dac, AD3552R_CH_AMPLIFIER_POWERDOWN,
+ ch, 1);
+ if (err)
+ return err;
+ }
+
+ dac->num_ch = cnt;
+
+ return 0;
+put_child:
+ fwnode_handle_put(child);
+
+ return err;
+}
+
+static int ad3552r_init(struct ad3552r_desc *dac)
+{
+ int err;
+ u16 val, id;
+
+ err = ad3552r_reset(dac);
+ if (err) {
+ dev_err(&dac->spi->dev, "Reset failed\n");
+ return err;
+ }
+
+ err = ad3552r_check_scratch_pad(dac);
+ if (err) {
+ dev_err(&dac->spi->dev, "Scratch pad test failed\n");
+ return err;
+ }
+
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_PRODUCT_ID_L, &val);
+ if (err) {
+ dev_err(&dac->spi->dev, "Fail read PRODUCT_ID_L\n");
+ return err;
+ }
+
+ id = val;
+ err = ad3552r_read_reg(dac, AD3552R_REG_ADDR_PRODUCT_ID_H, &val);
+ if (err) {
+ dev_err(&dac->spi->dev, "Fail read PRODUCT_ID_H\n");
+ return err;
+ }
+
+ id |= val << 8;
+ if (id != dac->chip_id) {
+ dev_err(&dac->spi->dev, "Product id not matching\n");
+ return -ENODEV;
+ }
+
+ return ad3552r_configure_device(dac);
+}
+
+static int ad3552r_probe(struct spi_device *spi)
+{
+ const struct spi_device_id *id = spi_get_device_id(spi);
+ struct ad3552r_desc *dac;
+ struct iio_dev *indio_dev;
+ int err;
+
+ indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*dac));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ dac = iio_priv(indio_dev);
+ dac->spi = spi;
+ dac->chip_id = id->driver_data;
+
+ mutex_init(&dac->lock);
+
+ err = ad3552r_init(dac);
+ if (err)
+ return err;
+
+ /* Config triggered buffer device */
+ if (dac->chip_id == AD3552R_ID)
+ indio_dev->name = "ad3552r";
+ else
+ indio_dev->name = "ad3542r";
+ indio_dev->dev.parent = &spi->dev;
+ indio_dev->info = &ad3552r_iio_info;
+ indio_dev->num_channels = dac->num_ch;
+ indio_dev->channels = dac->channels;
+ indio_dev->modes = INDIO_DIRECT_MODE;
+
+ err = devm_iio_triggered_buffer_setup_ext(&indio_dev->dev, indio_dev, NULL,
+ &ad3552r_trigger_handler,
+ IIO_BUFFER_DIRECTION_OUT,
+ NULL,
+ NULL);
+ if (err)
+ return err;
+
+ return devm_iio_device_register(&spi->dev, indio_dev);
+}
+
+static const struct spi_device_id ad3552r_id[] = {
+ { "ad3542r", AD3542R_ID },
+ { "ad3552r", AD3552R_ID },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, ad3552r_id);
+
+static const struct of_device_id ad3552r_of_match[] = {
+ { .compatible = "adi,ad3542r"},
+ { .compatible = "adi,ad3552r"},
+ { }
+};
+MODULE_DEVICE_TABLE(of, ad3552r_of_match);
+
+static struct spi_driver ad3552r_driver = {
+ .driver = {
+ .name = "ad3552r",
+ .of_match_table = ad3552r_of_match,
+ },
+ .probe = ad3552r_probe,
+ .id_table = ad3552r_id
+};
+module_spi_driver(ad3552r_driver);
+
+MODULE_AUTHOR("Mihail Chindris <mihail.chindris@analog.com>");
+MODULE_DESCRIPTION("Analog Device AD3552R DAC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
index fd9cac4f6321..27ee2c63c5d4 100644
--- a/drivers/iio/dac/ad5064.c
+++ b/drivers/iio/dac/ad5064.c
@@ -377,7 +377,7 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5064_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5064_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5064_powerdown_mode_enum),
{ },
};
@@ -389,7 +389,7 @@ static const struct iio_chan_spec_ext_info ltc2617_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ltc2617_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ltc2617_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ltc2617_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
index 8ca26bb4b62f..e38860a6a9f3 100644
--- a/drivers/iio/dac/ad5380.c
+++ b/drivers/iio/dac/ad5380.c
@@ -249,7 +249,7 @@ static const struct iio_chan_spec_ext_info ad5380_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
&ad5380_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5380_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5380_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 3cc5513a6cbf..1c9b54c012a7 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -142,7 +142,7 @@ static const struct iio_chan_spec_ext_info ad5446_ext_info_powerdown[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5446_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5446_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5446_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c
index 19cdf9890d02..b631261efa97 100644
--- a/drivers/iio/dac/ad5504.c
+++ b/drivers/iio/dac/ad5504.c
@@ -241,7 +241,7 @@ static const struct iio_chan_spec_ext_info ad5504_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
&ad5504_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5504_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5504_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c
index 530529feebb5..3c98941b9f99 100644
--- a/drivers/iio/dac/ad5624r_spi.c
+++ b/drivers/iio/dac/ad5624r_spi.c
@@ -159,7 +159,7 @@ static const struct iio_chan_spec_ext_info ad5624r_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
&ad5624r_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5624r_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5624r_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c
index 8f001db775f4..e592a995f404 100644
--- a/drivers/iio/dac/ad5686.c
+++ b/drivers/iio/dac/ad5686.c
@@ -184,7 +184,7 @@ static const struct iio_chan_spec_ext_info ad5686_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad5686_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5686_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5686_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad5755.c b/drivers/iio/dac/ad5755.c
index cabc38d54085..7a62e6e1d5f1 100644
--- a/drivers/iio/dac/ad5755.c
+++ b/drivers/iio/dac/ad5755.c
@@ -13,10 +13,10 @@
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/delay.h>
-#include <linux/of.h>
+#include <linux/property.h>
+
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
-#include <linux/platform_data/ad5755.h>
#define AD5755_NUM_CHANNELS 4
@@ -63,6 +63,101 @@
#define AD5755_SLEW_RATE_SHIFT 3
#define AD5755_SLEW_ENABLE BIT(12)
+enum ad5755_mode {
+ AD5755_MODE_VOLTAGE_0V_5V = 0,
+ AD5755_MODE_VOLTAGE_0V_10V = 1,
+ AD5755_MODE_VOLTAGE_PLUSMINUS_5V = 2,
+ AD5755_MODE_VOLTAGE_PLUSMINUS_10V = 3,
+ AD5755_MODE_CURRENT_4mA_20mA = 4,
+ AD5755_MODE_CURRENT_0mA_20mA = 5,
+ AD5755_MODE_CURRENT_0mA_24mA = 6,
+};
+
+enum ad5755_dc_dc_phase {
+ AD5755_DC_DC_PHASE_ALL_SAME_EDGE = 0,
+ AD5755_DC_DC_PHASE_A_B_SAME_EDGE_C_D_OPP_EDGE = 1,
+ AD5755_DC_DC_PHASE_A_C_SAME_EDGE_B_D_OPP_EDGE = 2,
+ AD5755_DC_DC_PHASE_90_DEGREE = 3,
+};
+
+enum ad5755_dc_dc_freq {
+ AD5755_DC_DC_FREQ_250kHZ = 0,
+ AD5755_DC_DC_FREQ_410kHZ = 1,
+ AD5755_DC_DC_FREQ_650kHZ = 2,
+};
+
+enum ad5755_dc_dc_maxv {
+ AD5755_DC_DC_MAXV_23V = 0,
+ AD5755_DC_DC_MAXV_24V5 = 1,
+ AD5755_DC_DC_MAXV_27V = 2,
+ AD5755_DC_DC_MAXV_29V5 = 3,
+};
+
+enum ad5755_slew_rate {
+ AD5755_SLEW_RATE_64k = 0,
+ AD5755_SLEW_RATE_32k = 1,
+ AD5755_SLEW_RATE_16k = 2,
+ AD5755_SLEW_RATE_8k = 3,
+ AD5755_SLEW_RATE_4k = 4,
+ AD5755_SLEW_RATE_2k = 5,
+ AD5755_SLEW_RATE_1k = 6,
+ AD5755_SLEW_RATE_500 = 7,
+ AD5755_SLEW_RATE_250 = 8,
+ AD5755_SLEW_RATE_125 = 9,
+ AD5755_SLEW_RATE_64 = 10,
+ AD5755_SLEW_RATE_32 = 11,
+ AD5755_SLEW_RATE_16 = 12,
+ AD5755_SLEW_RATE_8 = 13,
+ AD5755_SLEW_RATE_4 = 14,
+ AD5755_SLEW_RATE_0_5 = 15,
+};
+
+enum ad5755_slew_step_size {
+ AD5755_SLEW_STEP_SIZE_1 = 0,
+ AD5755_SLEW_STEP_SIZE_2 = 1,
+ AD5755_SLEW_STEP_SIZE_4 = 2,
+ AD5755_SLEW_STEP_SIZE_8 = 3,
+ AD5755_SLEW_STEP_SIZE_16 = 4,
+ AD5755_SLEW_STEP_SIZE_32 = 5,
+ AD5755_SLEW_STEP_SIZE_64 = 6,
+ AD5755_SLEW_STEP_SIZE_128 = 7,
+ AD5755_SLEW_STEP_SIZE_256 = 8,
+};
+
+/**
+ * struct ad5755_platform_data - AD5755 DAC driver platform data
+ * @ext_dc_dc_compenstation_resistor: Whether an external DC-DC converter
+ * compensation register is used.
+ * @dc_dc_phase: DC-DC converter phase.
+ * @dc_dc_freq: DC-DC converter frequency.
+ * @dc_dc_maxv: DC-DC maximum allowed boost voltage.
+ * @dac: Per DAC instance parameters.
+ * @dac.mode: The mode to be used for the DAC output.
+ * @dac.ext_current_sense_resistor: Whether an external current sense resistor
+ * is used.
+ * @dac.enable_voltage_overrange: Whether to enable 20% voltage output overrange.
+ * @dac.slew.enable: Whether to enable digital slew.
+ * @dac.slew.rate: Slew rate of the digital slew.
+ * @dac.slew.step_size: Slew step size of the digital slew.
+ **/
+struct ad5755_platform_data {
+ bool ext_dc_dc_compenstation_resistor;
+ enum ad5755_dc_dc_phase dc_dc_phase;
+ enum ad5755_dc_dc_freq dc_dc_freq;
+ enum ad5755_dc_dc_maxv dc_dc_maxv;
+
+ struct {
+ enum ad5755_mode mode;
+ bool ext_current_sense_resistor;
+ bool enable_voltage_overrange;
+ struct {
+ bool enable;
+ enum ad5755_slew_rate rate;
+ enum ad5755_slew_step_size step_size;
+ } slew;
+ } dac[4];
+};
+
/**
* struct ad5755_chip_info - chip specific information
* @channel_template: channel specification
@@ -111,7 +206,6 @@ enum ad5755_type {
ID_AD5737,
};
-#ifdef CONFIG_OF
static const int ad5755_dcdc_freq_table[][2] = {
{ 250000, AD5755_DC_DC_FREQ_250kHZ },
{ 410000, AD5755_DC_DC_FREQ_410kHZ },
@@ -154,7 +248,6 @@ static const int ad5755_slew_step_table[][2] = {
{ 2, AD5755_SLEW_STEP_SIZE_2 },
{ 1, AD5755_SLEW_STEP_SIZE_1 },
};
-#endif
static int ad5755_write_unlocked(struct iio_dev *indio_dev,
unsigned int reg, unsigned int val)
@@ -604,30 +697,29 @@ static const struct ad5755_platform_data ad5755_default_pdata = {
},
};
-#ifdef CONFIG_OF
-static struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
+static struct ad5755_platform_data *ad5755_parse_fw(struct device *dev)
{
- struct device_node *np = dev->of_node;
- struct device_node *pp;
+ struct fwnode_handle *pp;
struct ad5755_platform_data *pdata;
unsigned int tmp;
unsigned int tmparray[3];
int devnr, i;
+ if (!dev_fwnode(dev))
+ return NULL;
+
pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return NULL;
pdata->ext_dc_dc_compenstation_resistor =
- of_property_read_bool(np, "adi,ext-dc-dc-compenstation-resistor");
+ device_property_read_bool(dev, "adi,ext-dc-dc-compenstation-resistor");
- if (!of_property_read_u32(np, "adi,dc-dc-phase", &tmp))
- pdata->dc_dc_phase = tmp;
- else
- pdata->dc_dc_phase = AD5755_DC_DC_PHASE_ALL_SAME_EDGE;
+ pdata->dc_dc_phase = AD5755_DC_DC_PHASE_ALL_SAME_EDGE;
+ device_property_read_u32(dev, "adi,dc-dc-phase", &pdata->dc_dc_phase);
pdata->dc_dc_freq = AD5755_DC_DC_FREQ_410kHZ;
- if (!of_property_read_u32(np, "adi,dc-dc-freq-hz", &tmp)) {
+ if (!device_property_read_u32(dev, "adi,dc-dc-freq-hz", &tmp)) {
for (i = 0; i < ARRAY_SIZE(ad5755_dcdc_freq_table); i++) {
if (tmp == ad5755_dcdc_freq_table[i][0]) {
pdata->dc_dc_freq = ad5755_dcdc_freq_table[i][1];
@@ -641,7 +733,7 @@ static struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
}
pdata->dc_dc_maxv = AD5755_DC_DC_MAXV_23V;
- if (!of_property_read_u32(np, "adi,dc-dc-max-microvolt", &tmp)) {
+ if (!device_property_read_u32(dev, "adi,dc-dc-max-microvolt", &tmp)) {
for (i = 0; i < ARRAY_SIZE(ad5755_dcdc_maxv_table); i++) {
if (tmp == ad5755_dcdc_maxv_table[i][0]) {
pdata->dc_dc_maxv = ad5755_dcdc_maxv_table[i][1];
@@ -654,25 +746,23 @@ static struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
}
devnr = 0;
- for_each_child_of_node(np, pp) {
+ device_for_each_child_node(dev, pp) {
if (devnr >= AD5755_NUM_CHANNELS) {
dev_err(dev,
"There are too many channels defined in DT\n");
goto error_out;
}
- if (!of_property_read_u32(pp, "adi,mode", &tmp))
- pdata->dac[devnr].mode = tmp;
- else
- pdata->dac[devnr].mode = AD5755_MODE_CURRENT_4mA_20mA;
+ pdata->dac[devnr].mode = AD5755_MODE_CURRENT_4mA_20mA;
+ fwnode_property_read_u32(pp, "adi,mode", &pdata->dac[devnr].mode);
pdata->dac[devnr].ext_current_sense_resistor =
- of_property_read_bool(pp, "adi,ext-current-sense-resistor");
+ fwnode_property_read_bool(pp, "adi,ext-current-sense-resistor");
pdata->dac[devnr].enable_voltage_overrange =
- of_property_read_bool(pp, "adi,enable-voltage-overrange");
+ fwnode_property_read_bool(pp, "adi,enable-voltage-overrange");
- if (!of_property_read_u32_array(pp, "adi,slew", tmparray, 3)) {
+ if (!fwnode_property_read_u32_array(pp, "adi,slew", tmparray, 3)) {
pdata->dac[devnr].slew.enable = tmparray[0];
pdata->dac[devnr].slew.rate = AD5755_SLEW_RATE_64k;
@@ -715,18 +805,11 @@ static struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
devm_kfree(dev, pdata);
return NULL;
}
-#else
-static
-struct ad5755_platform_data *ad5755_parse_dt(struct device *dev)
-{
- return NULL;
-}
-#endif
static int ad5755_probe(struct spi_device *spi)
{
enum ad5755_type type = spi_get_device_id(spi)->driver_data;
- const struct ad5755_platform_data *pdata = dev_get_platdata(&spi->dev);
+ const struct ad5755_platform_data *pdata;
struct iio_dev *indio_dev;
struct ad5755_state *st;
int ret;
@@ -751,13 +834,10 @@ static int ad5755_probe(struct spi_device *spi)
mutex_init(&st->lock);
- if (spi->dev.of_node)
- pdata = ad5755_parse_dt(&spi->dev);
- else
- pdata = spi->dev.platform_data;
+ pdata = ad5755_parse_fw(&spi->dev);
if (!pdata) {
- dev_warn(&spi->dev, "no platform data? using default\n");
+ dev_warn(&spi->dev, "no firmware provided parameters? using default\n");
pdata = &ad5755_default_pdata;
}
diff --git a/drivers/iio/dac/ad5758.c b/drivers/iio/dac/ad5758.c
index 0572ef518101..98771e37a7b5 100644
--- a/drivers/iio/dac/ad5758.c
+++ b/drivers/iio/dac/ad5758.c
@@ -10,9 +10,8 @@
#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/property.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/spi/spi.h>
#include <linux/gpio/consumer.h>
diff --git a/drivers/iio/dac/ad5766.c b/drivers/iio/dac/ad5766.c
index b0d220c3a126..43189af2fb1f 100644
--- a/drivers/iio/dac/ad5766.c
+++ b/drivers/iio/dac/ad5766.c
@@ -426,14 +426,6 @@ static ssize_t ad5766_write_ext(struct iio_dev *indio_dev,
.shared = _shared, \
}
-#define IIO_ENUM_AVAILABLE_SHARED(_name, _shared, _e) \
-{ \
- .name = (_name "_available"), \
- .shared = _shared, \
- .read = iio_enum_available_read, \
- .private = (uintptr_t)(_e), \
-}
-
static const struct iio_chan_spec_ext_info ad5766_ext_info[] = {
_AD5766_CHAN_EXT_INFO("dither_enable", AD5766_DITHER_ENABLE,
@@ -443,9 +435,8 @@ static const struct iio_chan_spec_ext_info ad5766_ext_info[] = {
_AD5766_CHAN_EXT_INFO("dither_source", AD5766_DITHER_SOURCE,
IIO_SEPARATE),
IIO_ENUM("dither_scale", IIO_SEPARATE, &ad5766_dither_scale_enum),
- IIO_ENUM_AVAILABLE_SHARED("dither_scale",
- IIO_SEPARATE,
- &ad5766_dither_scale_enum),
+ IIO_ENUM_AVAILABLE("dither_scale", IIO_SEPARATE,
+ &ad5766_dither_scale_enum),
{}
};
diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c
index a0923b76e8b6..7b4579d73d18 100644
--- a/drivers/iio/dac/ad5791.c
+++ b/drivers/iio/dac/ad5791.c
@@ -285,7 +285,7 @@ static const struct iio_chan_spec_ext_info ad5791_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE,
&ad5791_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ad5791_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ad5791_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/ad7293.c b/drivers/iio/dac/ad7293.c
new file mode 100644
index 000000000000..59a38ca4c3c7
--- /dev/null
+++ b/drivers/iio/dac/ad7293.c
@@ -0,0 +1,934 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AD7293 driver
+ *
+ * Copyright 2021 Analog Devices Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iio/iio.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <asm/unaligned.h>
+
+#define AD7293_R1B BIT(16)
+#define AD7293_R2B BIT(17)
+#define AD7293_PAGE_ADDR_MSK GENMASK(15, 8)
+#define AD7293_PAGE(x) FIELD_PREP(AD7293_PAGE_ADDR_MSK, x)
+
+/* AD7293 Register Map Common */
+#define AD7293_REG_NO_OP (AD7293_R1B | AD7293_PAGE(0x0) | 0x0)
+#define AD7293_REG_PAGE_SELECT (AD7293_R1B | AD7293_PAGE(0x0) | 0x1)
+#define AD7293_REG_CONV_CMD (AD7293_R2B | AD7293_PAGE(0x0) | 0x2)
+#define AD7293_REG_RESULT (AD7293_R1B | AD7293_PAGE(0x0) | 0x3)
+#define AD7293_REG_DAC_EN (AD7293_R1B | AD7293_PAGE(0x0) | 0x4)
+#define AD7293_REG_DEVICE_ID (AD7293_R2B | AD7293_PAGE(0x0) | 0xC)
+#define AD7293_REG_SOFT_RESET (AD7293_R2B | AD7293_PAGE(0x0) | 0xF)
+
+/* AD7293 Register Map Page 0x0 */
+#define AD7293_REG_VIN0 (AD7293_R2B | AD7293_PAGE(0x0) | 0x10)
+#define AD7293_REG_VIN1 (AD7293_R2B | AD7293_PAGE(0x0) | 0x11)
+#define AD7293_REG_VIN2 (AD7293_R2B | AD7293_PAGE(0x0) | 0x12)
+#define AD7293_REG_VIN3 (AD7293_R2B | AD7293_PAGE(0x0) | 0x13)
+#define AD7293_REG_TSENSE_INT (AD7293_R2B | AD7293_PAGE(0x0) | 0x20)
+#define AD7293_REG_TSENSE_D0 (AD7293_R2B | AD7293_PAGE(0x0) | 0x21)
+#define AD7293_REG_TSENSE_D1 (AD7293_R2B | AD7293_PAGE(0x0) | 0x22)
+#define AD7293_REG_ISENSE_0 (AD7293_R2B | AD7293_PAGE(0x0) | 0x28)
+#define AD7293_REG_ISENSE_1 (AD7293_R2B | AD7293_PAGE(0x0) | 0x29)
+#define AD7293_REG_ISENSE_2 (AD7293_R2B | AD7293_PAGE(0x0) | 0x2A)
+#define AD7293_REG_ISENSE_3 (AD7293_R2B | AD7293_PAGE(0x0) | 0x2B)
+#define AD7293_REG_UNI_VOUT0 (AD7293_R2B | AD7293_PAGE(0x0) | 0x30)
+#define AD7293_REG_UNI_VOUT1 (AD7293_R2B | AD7293_PAGE(0x0) | 0x31)
+#define AD7293_REG_UNI_VOUT2 (AD7293_R2B | AD7293_PAGE(0x0) | 0x32)
+#define AD7293_REG_UNI_VOUT3 (AD7293_R2B | AD7293_PAGE(0x0) | 0x33)
+#define AD7293_REG_BI_VOUT0 (AD7293_R2B | AD7293_PAGE(0x0) | 0x34)
+#define AD7293_REG_BI_VOUT1 (AD7293_R2B | AD7293_PAGE(0x0) | 0x35)
+#define AD7293_REG_BI_VOUT2 (AD7293_R2B | AD7293_PAGE(0x0) | 0x36)
+#define AD7293_REG_BI_VOUT3 (AD7293_R2B | AD7293_PAGE(0x0) | 0x37)
+
+/* AD7293 Register Map Page 0x2 */
+#define AD7293_REG_DIGITAL_OUT_EN (AD7293_R2B | AD7293_PAGE(0x2) | 0x11)
+#define AD7293_REG_DIGITAL_INOUT_FUNC (AD7293_R2B | AD7293_PAGE(0x2) | 0x12)
+#define AD7293_REG_DIGITAL_FUNC_POL (AD7293_R2B | AD7293_PAGE(0x2) | 0x13)
+#define AD7293_REG_GENERAL (AD7293_R2B | AD7293_PAGE(0x2) | 0x14)
+#define AD7293_REG_VINX_RANGE0 (AD7293_R2B | AD7293_PAGE(0x2) | 0x15)
+#define AD7293_REG_VINX_RANGE1 (AD7293_R2B | AD7293_PAGE(0x2) | 0x16)
+#define AD7293_REG_VINX_DIFF_SE (AD7293_R2B | AD7293_PAGE(0x2) | 0x17)
+#define AD7293_REG_VINX_FILTER (AD7293_R2B | AD7293_PAGE(0x2) | 0x18)
+#define AD7293_REG_BG_EN (AD7293_R2B | AD7293_PAGE(0x2) | 0x19)
+#define AD7293_REG_CONV_DELAY (AD7293_R2B | AD7293_PAGE(0x2) | 0x1A)
+#define AD7293_REG_TSENSE_BG_EN (AD7293_R2B | AD7293_PAGE(0x2) | 0x1B)
+#define AD7293_REG_ISENSE_BG_EN (AD7293_R2B | AD7293_PAGE(0x2) | 0x1C)
+#define AD7293_REG_ISENSE_GAIN (AD7293_R2B | AD7293_PAGE(0x2) | 0x1D)
+#define AD7293_REG_DAC_SNOOZE_O (AD7293_R2B | AD7293_PAGE(0x2) | 0x1F)
+#define AD7293_REG_DAC_SNOOZE_1 (AD7293_R2B | AD7293_PAGE(0x2) | 0x20)
+#define AD7293_REG_RSX_MON_BG_EN (AD7293_R2B | AD7293_PAGE(0x2) | 0x23)
+#define AD7293_REG_INTEGR_CL (AD7293_R2B | AD7293_PAGE(0x2) | 0x28)
+#define AD7293_REG_PA_ON_CTRL (AD7293_R2B | AD7293_PAGE(0x2) | 0x29)
+#define AD7293_REG_RAMP_TIME_0 (AD7293_R2B | AD7293_PAGE(0x2) | 0x2A)
+#define AD7293_REG_RAMP_TIME_1 (AD7293_R2B | AD7293_PAGE(0x2) | 0x2B)
+#define AD7293_REG_RAMP_TIME_2 (AD7293_R2B | AD7293_PAGE(0x2) | 0x2C)
+#define AD7293_REG_RAMP_TIME_3 (AD7293_R2B | AD7293_PAGE(0x2) | 0x2D)
+#define AD7293_REG_CL_FR_IT (AD7293_R2B | AD7293_PAGE(0x2) | 0x2E)
+#define AD7293_REG_INTX_AVSS_AVDD (AD7293_R2B | AD7293_PAGE(0x2) | 0x2F)
+
+/* AD7293 Register Map Page 0x3 */
+#define AD7293_REG_VINX_SEQ (AD7293_R2B | AD7293_PAGE(0x3) | 0x10)
+#define AD7293_REG_ISENSEX_TSENSEX_SEQ (AD7293_R2B | AD7293_PAGE(0x3) | 0x11)
+#define AD7293_REG_RSX_MON_BI_VOUTX_SEQ (AD7293_R2B | AD7293_PAGE(0x3) | 0x12)
+
+/* AD7293 Register Map Page 0xE */
+#define AD7293_REG_VIN0_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x10)
+#define AD7293_REG_VIN1_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x11)
+#define AD7293_REG_VIN2_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x12)
+#define AD7293_REG_VIN3_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x13)
+#define AD7293_REG_TSENSE_INT_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x20)
+#define AD7293_REG_TSENSE_D0_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x21)
+#define AD7293_REG_TSENSE_D1_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x22)
+#define AD7293_REG_ISENSE0_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x28)
+#define AD7293_REG_ISENSE1_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x29)
+#define AD7293_REG_ISENSE2_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x2A)
+#define AD7293_REG_ISENSE3_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x2B)
+#define AD7293_REG_UNI_VOUT0_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x30)
+#define AD7293_REG_UNI_VOUT1_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x31)
+#define AD7293_REG_UNI_VOUT2_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x32)
+#define AD7293_REG_UNI_VOUT3_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x33)
+#define AD7293_REG_BI_VOUT0_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x34)
+#define AD7293_REG_BI_VOUT1_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x35)
+#define AD7293_REG_BI_VOUT2_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x36)
+#define AD7293_REG_BI_VOUT3_OFFSET (AD7293_R1B | AD7293_PAGE(0xE) | 0x37)
+
+/* AD7293 Miscellaneous Definitions */
+#define AD7293_READ BIT(7)
+#define AD7293_TRANSF_LEN_MSK GENMASK(17, 16)
+
+#define AD7293_REG_ADDR_MSK GENMASK(7, 0)
+#define AD7293_REG_VOUT_OFFSET_MSK GENMASK(5, 4)
+#define AD7293_REG_DATA_RAW_MSK GENMASK(15, 4)
+#define AD7293_REG_VINX_RANGE_GET_CH_MSK(x, ch) (((x) >> (ch)) & 0x1)
+#define AD7293_REG_VINX_RANGE_SET_CH_MSK(x, ch) (((x) & 0x1) << (ch))
+#define AD7293_CHIP_ID 0x18
+
+enum ad7293_ch_type {
+ AD7293_ADC_VINX,
+ AD7293_ADC_TSENSE,
+ AD7293_ADC_ISENSE,
+ AD7293_DAC,
+};
+
+enum ad7293_max_offset {
+ AD7293_TSENSE_MIN_OFFSET_CH = 4,
+ AD7293_ISENSE_MIN_OFFSET_CH = 7,
+ AD7293_VOUT_MIN_OFFSET_CH = 11,
+ AD7293_VOUT_MAX_OFFSET_CH = 18,
+};
+
+static const int dac_offset_table[] = {0, 1, 2};
+
+static const int isense_gain_table[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+static const int adc_range_table[] = {0, 1, 2, 3};
+
+struct ad7293_state {
+ struct spi_device *spi;
+ /* Protect against concurrent accesses to the device, page selection and data content */
+ struct mutex lock;
+ struct gpio_desc *gpio_reset;
+ struct regulator *reg_avdd;
+ struct regulator *reg_vdrive;
+ u8 page_select;
+ u8 data[3] ____cacheline_aligned;
+};
+
+static int ad7293_page_select(struct ad7293_state *st, unsigned int reg)
+{
+ int ret;
+
+ if (st->page_select != FIELD_GET(AD7293_PAGE_ADDR_MSK, reg)) {
+ st->data[0] = FIELD_GET(AD7293_REG_ADDR_MSK, AD7293_REG_PAGE_SELECT);
+ st->data[1] = FIELD_GET(AD7293_PAGE_ADDR_MSK, reg);
+
+ ret = spi_write(st->spi, &st->data[0], 2);
+ if (ret)
+ return ret;
+
+ st->page_select = FIELD_GET(AD7293_PAGE_ADDR_MSK, reg);
+ }
+
+ return 0;
+}
+
+static int __ad7293_spi_read(struct ad7293_state *st, unsigned int reg,
+ u16 *val)
+{
+ int ret;
+ unsigned int length;
+ struct spi_transfer t = {0};
+
+ length = FIELD_GET(AD7293_TRANSF_LEN_MSK, reg);
+
+ ret = ad7293_page_select(st, reg);
+ if (ret)
+ return ret;
+
+ st->data[0] = AD7293_READ | FIELD_GET(AD7293_REG_ADDR_MSK, reg);
+ st->data[1] = 0x0;
+ st->data[2] = 0x0;
+
+ t.tx_buf = &st->data[0];
+ t.rx_buf = &st->data[0];
+ t.len = length + 1;
+
+ ret = spi_sync_transfer(st->spi, &t, 1);
+ if (ret)
+ return ret;
+
+ if (length == 1)
+ *val = st->data[1];
+ else
+ *val = get_unaligned_be16(&st->data[1]);
+
+ return 0;
+}
+
+static int ad7293_spi_read(struct ad7293_state *st, unsigned int reg,
+ u16 *val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __ad7293_spi_read(st, reg, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __ad7293_spi_write(struct ad7293_state *st, unsigned int reg,
+ u16 val)
+{
+ int ret;
+ unsigned int length;
+
+ length = FIELD_GET(AD7293_TRANSF_LEN_MSK, reg);
+
+ ret = ad7293_page_select(st, reg);
+ if (ret)
+ return ret;
+
+ st->data[0] = FIELD_GET(AD7293_REG_ADDR_MSK, reg);
+
+ if (length == 1)
+ st->data[1] = val;
+ else
+ put_unaligned_be16(val, &st->data[1]);
+
+ return spi_write(st->spi, &st->data[0], length + 1);
+}
+
+static int ad7293_spi_write(struct ad7293_state *st, unsigned int reg,
+ u16 val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __ad7293_spi_write(st, reg, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __ad7293_spi_update_bits(struct ad7293_state *st, unsigned int reg,
+ u16 mask, u16 val)
+{
+ int ret;
+ u16 data, temp;
+
+ ret = __ad7293_spi_read(st, reg, &data);
+ if (ret)
+ return ret;
+
+ temp = (data & ~mask) | (val & mask);
+
+ return __ad7293_spi_write(st, reg, temp);
+}
+
+static int ad7293_spi_update_bits(struct ad7293_state *st, unsigned int reg,
+ u16 mask, u16 val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __ad7293_spi_update_bits(st, reg, mask, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad7293_adc_get_scale(struct ad7293_state *st, unsigned int ch,
+ u16 *range)
+{
+ int ret;
+ u16 data;
+
+ mutex_lock(&st->lock);
+
+ ret = __ad7293_spi_read(st, AD7293_REG_VINX_RANGE1, &data);
+ if (ret)
+ goto exit;
+
+ *range = AD7293_REG_VINX_RANGE_GET_CH_MSK(data, ch);
+
+ ret = __ad7293_spi_read(st, AD7293_REG_VINX_RANGE0, &data);
+ if (ret)
+ goto exit;
+
+ *range |= AD7293_REG_VINX_RANGE_GET_CH_MSK(data, ch) << 1;
+
+exit:
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad7293_adc_set_scale(struct ad7293_state *st, unsigned int ch,
+ u16 range)
+{
+ int ret;
+ unsigned int ch_msk = BIT(ch);
+
+ mutex_lock(&st->lock);
+ ret = __ad7293_spi_update_bits(st, AD7293_REG_VINX_RANGE1, ch_msk,
+ AD7293_REG_VINX_RANGE_SET_CH_MSK(range, ch));
+ if (ret)
+ goto exit;
+
+ ret = __ad7293_spi_update_bits(st, AD7293_REG_VINX_RANGE0, ch_msk,
+ AD7293_REG_VINX_RANGE_SET_CH_MSK((range >> 1), ch));
+
+exit:
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad7293_get_offset(struct ad7293_state *st, unsigned int ch,
+ u16 *offset)
+{
+ if (ch < AD7293_TSENSE_MIN_OFFSET_CH)
+ return ad7293_spi_read(st, AD7293_REG_VIN0_OFFSET + ch, offset);
+ else if (ch < AD7293_ISENSE_MIN_OFFSET_CH)
+ return ad7293_spi_read(st, AD7293_REG_TSENSE_INT_OFFSET + (ch - 4), offset);
+ else if (ch < AD7293_VOUT_MIN_OFFSET_CH)
+ return ad7293_spi_read(st, AD7293_REG_ISENSE0_OFFSET + (ch - 7), offset);
+ else if (ch <= AD7293_VOUT_MAX_OFFSET_CH)
+ return ad7293_spi_read(st, AD7293_REG_UNI_VOUT0_OFFSET + (ch - 11), offset);
+
+ return -EINVAL;
+}
+
+static int ad7293_set_offset(struct ad7293_state *st, unsigned int ch,
+ u16 offset)
+{
+ if (ch < AD7293_TSENSE_MIN_OFFSET_CH)
+ return ad7293_spi_write(st, AD7293_REG_VIN0_OFFSET + ch,
+ offset);
+ else if (ch < AD7293_ISENSE_MIN_OFFSET_CH)
+ return ad7293_spi_write(st,
+ AD7293_REG_TSENSE_INT_OFFSET +
+ (ch - AD7293_TSENSE_MIN_OFFSET_CH),
+ offset);
+ else if (ch < AD7293_VOUT_MIN_OFFSET_CH)
+ return ad7293_spi_write(st,
+ AD7293_REG_ISENSE0_OFFSET +
+ (ch - AD7293_ISENSE_MIN_OFFSET_CH),
+ offset);
+ else if (ch <= AD7293_VOUT_MAX_OFFSET_CH)
+ return ad7293_spi_update_bits(st,
+ AD7293_REG_UNI_VOUT0_OFFSET +
+ (ch - AD7293_VOUT_MIN_OFFSET_CH),
+ AD7293_REG_VOUT_OFFSET_MSK,
+ FIELD_PREP(AD7293_REG_VOUT_OFFSET_MSK, offset));
+
+ return -EINVAL;
+}
+
+static int ad7293_isense_set_scale(struct ad7293_state *st, unsigned int ch,
+ u16 gain)
+{
+ unsigned int ch_msk = (0xf << (4 * ch));
+
+ return ad7293_spi_update_bits(st, AD7293_REG_ISENSE_GAIN, ch_msk,
+ gain << (4 * ch));
+}
+
+static int ad7293_isense_get_scale(struct ad7293_state *st, unsigned int ch,
+ u16 *gain)
+{
+ int ret;
+
+ ret = ad7293_spi_read(st, AD7293_REG_ISENSE_GAIN, gain);
+ if (ret)
+ return ret;
+
+ *gain = (*gain >> (4 * ch)) & 0xf;
+
+ return ret;
+}
+
+static int ad7293_dac_write_raw(struct ad7293_state *st, unsigned int ch,
+ u16 raw)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+
+ ret = __ad7293_spi_update_bits(st, AD7293_REG_DAC_EN, BIT(ch), BIT(ch));
+ if (ret)
+ goto exit;
+
+ ret = __ad7293_spi_write(st, AD7293_REG_UNI_VOUT0 + ch,
+ FIELD_PREP(AD7293_REG_DATA_RAW_MSK, raw));
+
+exit:
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad7293_ch_read_raw(struct ad7293_state *st, enum ad7293_ch_type type,
+ unsigned int ch, u16 *raw)
+{
+ int ret;
+ unsigned int reg_wr, reg_rd, data_wr;
+
+ switch (type) {
+ case AD7293_ADC_VINX:
+ reg_wr = AD7293_REG_VINX_SEQ;
+ reg_rd = AD7293_REG_VIN0 + ch;
+ data_wr = BIT(ch);
+
+ break;
+ case AD7293_ADC_TSENSE:
+ reg_wr = AD7293_REG_ISENSEX_TSENSEX_SEQ;
+ reg_rd = AD7293_REG_TSENSE_INT + ch;
+ data_wr = BIT(ch);
+
+ break;
+ case AD7293_ADC_ISENSE:
+ reg_wr = AD7293_REG_ISENSEX_TSENSEX_SEQ;
+ reg_rd = AD7293_REG_ISENSE_0 + ch;
+ data_wr = BIT(ch) << 8;
+
+ break;
+ case AD7293_DAC:
+ reg_rd = AD7293_REG_UNI_VOUT0 + ch;
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ mutex_lock(&st->lock);
+
+ if (type != AD7293_DAC) {
+ if (type == AD7293_ADC_TSENSE) {
+ ret = __ad7293_spi_write(st, AD7293_REG_TSENSE_BG_EN,
+ BIT(ch));
+ if (ret)
+ goto exit;
+
+ usleep_range(9000, 9900);
+ } else if (type == AD7293_ADC_ISENSE) {
+ ret = __ad7293_spi_write(st, AD7293_REG_ISENSE_BG_EN,
+ BIT(ch));
+ if (ret)
+ goto exit;
+
+ usleep_range(2000, 7000);
+ }
+
+ ret = __ad7293_spi_write(st, reg_wr, data_wr);
+ if (ret)
+ goto exit;
+
+ ret = __ad7293_spi_write(st, AD7293_REG_CONV_CMD, 0x82);
+ if (ret)
+ goto exit;
+ }
+
+ ret = __ad7293_spi_read(st, reg_rd, raw);
+
+ *raw = FIELD_GET(AD7293_REG_DATA_RAW_MSK, *raw);
+
+exit:
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int ad7293_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2, long info)
+{
+ struct ad7293_state *st = iio_priv(indio_dev);
+ int ret;
+ u16 data;
+
+ switch (info) {
+ case IIO_CHAN_INFO_RAW:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (chan->output)
+ ret = ad7293_ch_read_raw(st, AD7293_DAC,
+ chan->channel, &data);
+ else
+ ret = ad7293_ch_read_raw(st, AD7293_ADC_VINX,
+ chan->channel, &data);
+
+ break;
+ case IIO_CURRENT:
+ ret = ad7293_ch_read_raw(st, AD7293_ADC_ISENSE,
+ chan->channel, &data);
+
+ break;
+ case IIO_TEMP:
+ ret = ad7293_ch_read_raw(st, AD7293_ADC_TSENSE,
+ chan->channel, &data);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ *val = data;
+
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_OFFSET:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (chan->output) {
+ ret = ad7293_get_offset(st,
+ chan->channel + AD7293_VOUT_MIN_OFFSET_CH,
+ &data);
+
+ data = FIELD_GET(AD7293_REG_VOUT_OFFSET_MSK, data);
+ } else {
+ ret = ad7293_get_offset(st, chan->channel, &data);
+ }
+
+ break;
+ case IIO_CURRENT:
+ ret = ad7293_get_offset(st,
+ chan->channel + AD7293_ISENSE_MIN_OFFSET_CH,
+ &data);
+
+ break;
+ case IIO_TEMP:
+ ret = ad7293_get_offset(st,
+ chan->channel + AD7293_TSENSE_MIN_OFFSET_CH,
+ &data);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret)
+ return ret;
+
+ *val = data;
+
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_SCALE:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ ret = ad7293_adc_get_scale(st, chan->channel, &data);
+ if (ret)
+ return ret;
+
+ *val = data;
+
+ return IIO_VAL_INT;
+ case IIO_CURRENT:
+ ret = ad7293_isense_get_scale(st, chan->channel, &data);
+ if (ret)
+ return ret;
+
+ *val = data;
+
+ return IIO_VAL_INT;
+ case IIO_TEMP:
+ *val = 1;
+ *val2 = 8;
+
+ return IIO_VAL_FRACTIONAL;
+ default:
+ return -EINVAL;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ad7293_write_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int val, int val2, long info)
+{
+ struct ad7293_state *st = iio_priv(indio_dev);
+
+ switch (info) {
+ case IIO_CHAN_INFO_RAW:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (!chan->output)
+ return -EINVAL;
+
+ return ad7293_dac_write_raw(st, chan->channel, val);
+ default:
+ return -EINVAL;
+ }
+ case IIO_CHAN_INFO_OFFSET:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ if (chan->output)
+ return ad7293_set_offset(st,
+ chan->channel +
+ AD7293_VOUT_MIN_OFFSET_CH,
+ val);
+ else
+ return ad7293_set_offset(st, chan->channel, val);
+ case IIO_CURRENT:
+ return ad7293_set_offset(st,
+ chan->channel +
+ AD7293_ISENSE_MIN_OFFSET_CH,
+ val);
+ case IIO_TEMP:
+ return ad7293_set_offset(st,
+ chan->channel +
+ AD7293_TSENSE_MIN_OFFSET_CH,
+ val);
+ default:
+ return -EINVAL;
+ }
+ case IIO_CHAN_INFO_SCALE:
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ return ad7293_adc_set_scale(st, chan->channel, val);
+ case IIO_CURRENT:
+ return ad7293_isense_set_scale(st, chan->channel, val);
+ default:
+ return -EINVAL;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ad7293_reg_access(struct iio_dev *indio_dev,
+ unsigned int reg,
+ unsigned int write_val,
+ unsigned int *read_val)
+{
+ struct ad7293_state *st = iio_priv(indio_dev);
+ int ret;
+
+ if (read_val) {
+ u16 temp;
+ ret = ad7293_spi_read(st, reg, &temp);
+ *read_val = temp;
+ } else {
+ ret = ad7293_spi_write(st, reg, (u16)write_val);
+ }
+
+ return ret;
+}
+
+static int ad7293_read_avail(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ const int **vals, int *type, int *length,
+ long info)
+{
+ switch (info) {
+ case IIO_CHAN_INFO_OFFSET:
+ *vals = dac_offset_table;
+ *type = IIO_VAL_INT;
+ *length = ARRAY_SIZE(dac_offset_table);
+
+ return IIO_AVAIL_LIST;
+ case IIO_CHAN_INFO_SCALE:
+ *type = IIO_VAL_INT;
+
+ switch (chan->type) {
+ case IIO_VOLTAGE:
+ *vals = adc_range_table;
+ *length = ARRAY_SIZE(adc_range_table);
+ return IIO_AVAIL_LIST;
+ case IIO_CURRENT:
+ *vals = isense_gain_table;
+ *length = ARRAY_SIZE(isense_gain_table);
+ return IIO_AVAIL_LIST;
+ default:
+ return -EINVAL;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+#define AD7293_CHAN_ADC(_channel) { \
+ .type = IIO_VOLTAGE, \
+ .output = 0, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_SCALE) | \
+ BIT(IIO_CHAN_INFO_OFFSET), \
+ .info_mask_shared_by_type_available = BIT(IIO_CHAN_INFO_SCALE) \
+}
+
+#define AD7293_CHAN_DAC(_channel) { \
+ .type = IIO_VOLTAGE, \
+ .output = 1, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_OFFSET), \
+ .info_mask_shared_by_type_available = BIT(IIO_CHAN_INFO_OFFSET) \
+}
+
+#define AD7293_CHAN_ISENSE(_channel) { \
+ .type = IIO_CURRENT, \
+ .output = 0, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_OFFSET) | \
+ BIT(IIO_CHAN_INFO_SCALE), \
+ .info_mask_shared_by_type_available = BIT(IIO_CHAN_INFO_SCALE) \
+}
+
+#define AD7293_CHAN_TEMP(_channel) { \
+ .type = IIO_TEMP, \
+ .output = 0, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+ BIT(IIO_CHAN_INFO_OFFSET), \
+ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) \
+}
+
+static const struct iio_chan_spec ad7293_channels[] = {
+ AD7293_CHAN_ADC(0),
+ AD7293_CHAN_ADC(1),
+ AD7293_CHAN_ADC(2),
+ AD7293_CHAN_ADC(3),
+ AD7293_CHAN_ISENSE(0),
+ AD7293_CHAN_ISENSE(1),
+ AD7293_CHAN_ISENSE(2),
+ AD7293_CHAN_ISENSE(3),
+ AD7293_CHAN_TEMP(0),
+ AD7293_CHAN_TEMP(1),
+ AD7293_CHAN_TEMP(2),
+ AD7293_CHAN_DAC(0),
+ AD7293_CHAN_DAC(1),
+ AD7293_CHAN_DAC(2),
+ AD7293_CHAN_DAC(3),
+ AD7293_CHAN_DAC(4),
+ AD7293_CHAN_DAC(5),
+ AD7293_CHAN_DAC(6),
+ AD7293_CHAN_DAC(7)
+};
+
+static int ad7293_soft_reset(struct ad7293_state *st)
+{
+ int ret;
+
+ ret = __ad7293_spi_write(st, AD7293_REG_SOFT_RESET, 0x7293);
+ if (ret)
+ return ret;
+
+ return __ad7293_spi_write(st, AD7293_REG_SOFT_RESET, 0x0000);
+}
+
+static int ad7293_reset(struct ad7293_state *st)
+{
+ if (st->gpio_reset) {
+ gpiod_set_value(st->gpio_reset, 0);
+ usleep_range(100, 1000);
+ gpiod_set_value(st->gpio_reset, 1);
+ usleep_range(100, 1000);
+
+ return 0;
+ }
+
+ /* Perform a software reset */
+ return ad7293_soft_reset(st);
+}
+
+static int ad7293_properties_parse(struct ad7293_state *st)
+{
+ struct spi_device *spi = st->spi;
+
+ st->gpio_reset = devm_gpiod_get_optional(&st->spi->dev, "reset",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(st->gpio_reset))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_reset),
+ "failed to get the reset GPIO\n");
+
+ st->reg_avdd = devm_regulator_get(&spi->dev, "avdd");
+ if (IS_ERR(st->reg_avdd))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->reg_avdd),
+ "failed to get the AVDD voltage\n");
+
+ st->reg_vdrive = devm_regulator_get(&spi->dev, "vdrive");
+ if (IS_ERR(st->reg_vdrive))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->reg_vdrive),
+ "failed to get the VDRIVE voltage\n");
+
+ return 0;
+}
+
+static void ad7293_reg_disable(void *data)
+{
+ regulator_disable(data);
+}
+
+static int ad7293_init(struct ad7293_state *st)
+{
+ int ret;
+ u16 chip_id;
+ struct spi_device *spi = st->spi;
+
+ ret = ad7293_properties_parse(st);
+ if (ret)
+ return ret;
+
+ ret = ad7293_reset(st);
+ if (ret)
+ return ret;
+
+ ret = regulator_enable(st->reg_avdd);
+ if (ret) {
+ dev_err(&spi->dev,
+ "Failed to enable specified AVDD Voltage!\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(&spi->dev, ad7293_reg_disable,
+ st->reg_avdd);
+ if (ret)
+ return ret;
+
+ ret = regulator_enable(st->reg_vdrive);
+ if (ret) {
+ dev_err(&spi->dev,
+ "Failed to enable specified VDRIVE Voltage!\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(&spi->dev, ad7293_reg_disable,
+ st->reg_vdrive);
+ if (ret)
+ return ret;
+
+ ret = regulator_get_voltage(st->reg_avdd);
+ if (ret < 0) {
+ dev_err(&spi->dev, "Failed to read avdd regulator: %d\n", ret);
+ return ret;
+ }
+
+ if (ret > 5500000 || ret < 4500000)
+ return -EINVAL;
+
+ ret = regulator_get_voltage(st->reg_vdrive);
+ if (ret < 0) {
+ dev_err(&spi->dev,
+ "Failed to read vdrive regulator: %d\n", ret);
+ return ret;
+ }
+ if (ret > 5500000 || ret < 1700000)
+ return -EINVAL;
+
+ /* Check Chip ID */
+ ret = __ad7293_spi_read(st, AD7293_REG_DEVICE_ID, &chip_id);
+ if (ret)
+ return ret;
+
+ if (chip_id != AD7293_CHIP_ID) {
+ dev_err(&spi->dev, "Invalid Chip ID.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct iio_info ad7293_info = {
+ .read_raw = ad7293_read_raw,
+ .write_raw = ad7293_write_raw,
+ .read_avail = &ad7293_read_avail,
+ .debugfs_reg_access = &ad7293_reg_access,
+};
+
+static int ad7293_probe(struct spi_device *spi)
+{
+ struct iio_dev *indio_dev;
+ struct ad7293_state *st;
+ int ret;
+
+ indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ st = iio_priv(indio_dev);
+
+ indio_dev->info = &ad7293_info;
+ indio_dev->name = "ad7293";
+ indio_dev->channels = ad7293_channels;
+ indio_dev->num_channels = ARRAY_SIZE(ad7293_channels);
+
+ st->spi = spi;
+ st->page_select = 0;
+
+ mutex_init(&st->lock);
+
+ ret = ad7293_init(st);
+ if (ret)
+ return ret;
+
+ return devm_iio_device_register(&spi->dev, indio_dev);
+}
+
+static const struct spi_device_id ad7293_id[] = {
+ { "ad7293", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(spi, ad7293_id);
+
+static const struct of_device_id ad7293_of_match[] = {
+ { .compatible = "adi,ad7293" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, ad7293_of_match);
+
+static struct spi_driver ad7293_driver = {
+ .driver = {
+ .name = "ad7293",
+ .of_match_table = ad7293_of_match,
+ },
+ .probe = ad7293_probe,
+ .id_table = ad7293_id,
+};
+module_spi_driver(ad7293_driver);
+
+MODULE_AUTHOR("Antoniu Miclaus <antoniu.miclaus@analog.com");
+MODULE_DESCRIPTION("Analog Devices AD7293");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/dpot-dac.c b/drivers/iio/dac/dpot-dac.c
index 5d1819448102..83ce9489259c 100644
--- a/drivers/iio/dac/dpot-dac.c
+++ b/drivers/iio/dac/dpot-dac.c
@@ -30,7 +30,7 @@
#include <linux/iio/consumer.h>
#include <linux/iio/iio.h>
#include <linux/module.h>
-#include <linux/of.h>
+#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
diff --git a/drivers/iio/dac/lpc18xx_dac.c b/drivers/iio/dac/lpc18xx_dac.c
index 5502e4f62f0d..60467c6f2c6e 100644
--- a/drivers/iio/dac/lpc18xx_dac.c
+++ b/drivers/iio/dac/lpc18xx_dac.c
@@ -16,9 +16,8 @@
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
diff --git a/drivers/iio/dac/max5821.c b/drivers/iio/dac/max5821.c
index 7da4710a6408..fce640b7f1c8 100644
--- a/drivers/iio/dac/max5821.c
+++ b/drivers/iio/dac/max5821.c
@@ -137,7 +137,7 @@ static const struct iio_chan_spec_ext_info max5821_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &max5821_powerdown_mode_enum),
- IIO_ENUM_AVAILABLE("powerdown_mode", &max5821_powerdown_mode_enum),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &max5821_powerdown_mode_enum),
{ },
};
diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 34b14aafb630..842bad57cb88 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -221,8 +221,8 @@ static const struct iio_chan_spec_ext_info mcp4725_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE,
&mcp472x_powerdown_mode_enum[MCP4725]),
- IIO_ENUM_AVAILABLE("powerdown_mode",
- &mcp472x_powerdown_mode_enum[MCP4725]),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE,
+ &mcp472x_powerdown_mode_enum[MCP4725]),
{ },
};
@@ -235,8 +235,8 @@ static const struct iio_chan_spec_ext_info mcp4726_ext_info[] = {
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE,
&mcp472x_powerdown_mode_enum[MCP4726]),
- IIO_ENUM_AVAILABLE("powerdown_mode",
- &mcp472x_powerdown_mode_enum[MCP4726]),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE,
+ &mcp472x_powerdown_mode_enum[MCP4726]),
{ },
};
@@ -386,7 +386,7 @@ static int mcp4725_probe(struct i2c_client *client,
i2c_set_clientdata(client, indio_dev);
data->client = client;
if (dev_fwnode(&client->dev))
- data->id = (enum chip_id)device_get_match_data(&client->dev);
+ data->id = (uintptr_t)device_get_match_data(&client->dev);
else
data->id = id->driver_data;
pdata = dev_get_platdata(&client->dev);
diff --git a/drivers/iio/dac/stm32-dac.c b/drivers/iio/dac/stm32-dac.c
index dd2e306824e7..cd71cc4553a7 100644
--- a/drivers/iio/dac/stm32-dac.c
+++ b/drivers/iio/dac/stm32-dac.c
@@ -246,7 +246,7 @@ static const struct iio_chan_spec_ext_info stm32_dac_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &stm32_dac_powerdown_mode_en),
- IIO_ENUM_AVAILABLE("powerdown_mode", &stm32_dac_powerdown_mode_en),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &stm32_dac_powerdown_mode_en),
{},
};
diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c
index 5c14bfb16521..6beda2193683 100644
--- a/drivers/iio/dac/ti-dac082s085.c
+++ b/drivers/iio/dac/ti-dac082s085.c
@@ -160,7 +160,7 @@ static const struct iio_chan_spec_ext_info ti_dac_ext_info[] = {
.shared = IIO_SHARED_BY_TYPE,
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE, &ti_dac_powerdown_mode),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ti_dac_powerdown_mode),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ti_dac_powerdown_mode),
{ },
};
diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c
index 546a4cf6c5ef..4a3b8d875518 100644
--- a/drivers/iio/dac/ti-dac5571.c
+++ b/drivers/iio/dac/ti-dac5571.c
@@ -212,7 +212,7 @@ static const struct iio_chan_spec_ext_info dac5571_ext_info[] = {
.shared = IIO_SEPARATE,
},
IIO_ENUM("powerdown_mode", IIO_SEPARATE, &dac5571_powerdown_mode),
- IIO_ENUM_AVAILABLE("powerdown_mode", &dac5571_powerdown_mode),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &dac5571_powerdown_mode),
{},
};
diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c
index 09218c3029f0..99f275829ec2 100644
--- a/drivers/iio/dac/ti-dac7311.c
+++ b/drivers/iio/dac/ti-dac7311.c
@@ -146,7 +146,7 @@ static const struct iio_chan_spec_ext_info ti_dac_ext_info[] = {
.shared = IIO_SHARED_BY_TYPE,
},
IIO_ENUM("powerdown_mode", IIO_SHARED_BY_TYPE, &ti_dac_powerdown_mode),
- IIO_ENUM_AVAILABLE("powerdown_mode", &ti_dac_powerdown_mode),
+ IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, &ti_dac_powerdown_mode),
{ },
};
diff --git a/drivers/iio/dummy/iio_simple_dummy_buffer.c b/drivers/iio/dummy/iio_simple_dummy_buffer.c
index 59aa60d4ca37..d81c2b2dad82 100644
--- a/drivers/iio/dummy/iio_simple_dummy_buffer.c
+++ b/drivers/iio/dummy/iio_simple_dummy_buffer.c
@@ -45,7 +45,6 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
{
struct iio_poll_func *pf = p;
struct iio_dev *indio_dev = pf->indio_dev;
- int len = 0;
u16 *data;
data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
@@ -79,7 +78,6 @@ static irqreturn_t iio_simple_dummy_trigger_h(int irq, void *p)
indio_dev->masklength, j);
/* random access read from the 'device' */
data[i] = fakedata[j];
- len += 2;
}
}
diff --git a/drivers/iio/filter/Kconfig b/drivers/iio/filter/Kconfig
new file mode 100644
index 000000000000..3ae35817ad82
--- /dev/null
+++ b/drivers/iio/filter/Kconfig
@@ -0,0 +1,18 @@
+#
+# Filter drivers
+#
+# When adding new entries keep the list in alphabetical order
+
+menu "Filters"
+
+config ADMV8818
+ tristate "Analog Devices ADMV8818 High-Pass and Low-Pass Filter"
+ depends on SPI && COMMON_CLK && 64BIT
+ help
+ Say yes here to build support for Analog Devices ADMV8818
+ 2 GHz to 18 GHz, Digitally Tunable, High-Pass and Low-Pass Filter.
+
+ To compile this driver as a module, choose M here: the
+ modiule will be called admv8818.
+
+endmenu
diff --git a/drivers/iio/filter/Makefile b/drivers/iio/filter/Makefile
new file mode 100644
index 000000000000..55e228c0dd20
--- /dev/null
+++ b/drivers/iio/filter/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for industrial I/O Filter drivers
+#
+
+# When adding new entries keep the list in alphabetical order
+obj-$(CONFIG_ADMV8818) += admv8818.o
diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c
new file mode 100644
index 000000000000..68de45fe21b4
--- /dev/null
+++ b/drivers/iio/filter/admv8818.c
@@ -0,0 +1,665 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ADMV8818 driver
+ *
+ * Copyright 2021 Analog Devices Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/iio/iio.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <linux/units.h>
+
+/* ADMV8818 Register Map */
+#define ADMV8818_REG_SPI_CONFIG_A 0x0
+#define ADMV8818_REG_SPI_CONFIG_B 0x1
+#define ADMV8818_REG_CHIPTYPE 0x3
+#define ADMV8818_REG_PRODUCT_ID_L 0x4
+#define ADMV8818_REG_PRODUCT_ID_H 0x5
+#define ADMV8818_REG_FAST_LATCH_POINTER 0x10
+#define ADMV8818_REG_FAST_LATCH_STOP 0x11
+#define ADMV8818_REG_FAST_LATCH_START 0x12
+#define ADMV8818_REG_FAST_LATCH_DIRECTION 0x13
+#define ADMV8818_REG_FAST_LATCH_STATE 0x14
+#define ADMV8818_REG_WR0_SW 0x20
+#define ADMV8818_REG_WR0_FILTER 0x21
+#define ADMV8818_REG_WR1_SW 0x22
+#define ADMV8818_REG_WR1_FILTER 0x23
+#define ADMV8818_REG_WR2_SW 0x24
+#define ADMV8818_REG_WR2_FILTER 0x25
+#define ADMV8818_REG_WR3_SW 0x26
+#define ADMV8818_REG_WR3_FILTER 0x27
+#define ADMV8818_REG_WR4_SW 0x28
+#define ADMV8818_REG_WR4_FILTER 0x29
+#define ADMV8818_REG_LUT0_SW 0x100
+#define ADMV8818_REG_LUT0_FILTER 0x101
+#define ADMV8818_REG_LUT127_SW 0x1FE
+#define ADMV8818_REG_LUT127_FILTER 0x1FF
+
+/* ADMV8818_REG_SPI_CONFIG_A Map */
+#define ADMV8818_SOFTRESET_N_MSK BIT(7)
+#define ADMV8818_LSB_FIRST_N_MSK BIT(6)
+#define ADMV8818_ENDIAN_N_MSK BIT(5)
+#define ADMV8818_SDOACTIVE_N_MSK BIT(4)
+#define ADMV8818_SDOACTIVE_MSK BIT(3)
+#define ADMV8818_ENDIAN_MSK BIT(2)
+#define ADMV8818_LSBFIRST_MSK BIT(1)
+#define ADMV8818_SOFTRESET_MSK BIT(0)
+
+/* ADMV8818_REG_SPI_CONFIG_B Map */
+#define ADMV8818_SINGLE_INSTRUCTION_MSK BIT(7)
+#define ADMV8818_CSB_STALL_MSK BIT(6)
+#define ADMV8818_MASTER_SLAVE_RB_MSK BIT(5)
+#define ADMV8818_MASTER_SLAVE_TRANSFER_MSK BIT(0)
+
+/* ADMV8818_REG_WR0_SW Map */
+#define ADMV8818_SW_IN_SET_WR0_MSK BIT(7)
+#define ADMV8818_SW_OUT_SET_WR0_MSK BIT(6)
+#define ADMV8818_SW_IN_WR0_MSK GENMASK(5, 3)
+#define ADMV8818_SW_OUT_WR0_MSK GENMASK(2, 0)
+
+/* ADMV8818_REG_WR0_FILTER Map */
+#define ADMV8818_HPF_WR0_MSK GENMASK(7, 4)
+#define ADMV8818_LPF_WR0_MSK GENMASK(3, 0)
+
+enum {
+ ADMV8818_BW_FREQ,
+ ADMV8818_CENTER_FREQ
+};
+
+enum {
+ ADMV8818_AUTO_MODE,
+ ADMV8818_MANUAL_MODE,
+};
+
+struct admv8818_state {
+ struct spi_device *spi;
+ struct regmap *regmap;
+ struct clk *clkin;
+ struct notifier_block nb;
+ /* Protect against concurrent accesses to the device and data content*/
+ struct mutex lock;
+ unsigned int filter_mode;
+ u64 cf_hz;
+};
+
+static const unsigned long long freq_range_hpf[4][2] = {
+ {1750000000ULL, 3550000000ULL},
+ {3400000000ULL, 7250000000ULL},
+ {6600000000, 12000000000},
+ {12500000000, 19900000000}
+};
+
+static const unsigned long long freq_range_lpf[4][2] = {
+ {2050000000ULL, 3850000000ULL},
+ {3350000000ULL, 7250000000ULL},
+ {7000000000, 13000000000},
+ {12550000000, 18500000000}
+};
+
+static const struct regmap_config admv8818_regmap_config = {
+ .reg_bits = 16,
+ .val_bits = 8,
+ .read_flag_mask = 0x80,
+ .max_register = 0x1FF,
+};
+
+static const char * const admv8818_modes[] = {
+ [0] = "auto",
+ [1] = "manual"
+};
+
+static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq)
+{
+ unsigned int hpf_step = 0, hpf_band = 0, i, j;
+ u64 freq_step;
+ int ret;
+
+ if (freq < freq_range_hpf[0][0])
+ goto hpf_write;
+
+ if (freq > freq_range_hpf[3][1]) {
+ hpf_step = 15;
+ hpf_band = 4;
+
+ goto hpf_write;
+ }
+
+ for (i = 0; i < 4; i++) {
+ freq_step = div_u64((freq_range_hpf[i][1] -
+ freq_range_hpf[i][0]), 15);
+
+ if (freq > freq_range_hpf[i][0] &&
+ (freq < freq_range_hpf[i][1] + freq_step)) {
+ hpf_band = i + 1;
+
+ for (j = 1; j <= 16; j++) {
+ if (freq < (freq_range_hpf[i][0] + (freq_step * j))) {
+ hpf_step = j - 1;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ /* Close HPF frequency gap between 12 and 12.5 GHz */
+ if (freq >= 12000 * HZ_PER_MHZ && freq <= 12500 * HZ_PER_MHZ) {
+ hpf_band = 3;
+ hpf_step = 15;
+ }
+
+hpf_write:
+ ret = regmap_update_bits(st->regmap, ADMV8818_REG_WR0_SW,
+ ADMV8818_SW_IN_SET_WR0_MSK |
+ ADMV8818_SW_IN_WR0_MSK,
+ FIELD_PREP(ADMV8818_SW_IN_SET_WR0_MSK, 1) |
+ FIELD_PREP(ADMV8818_SW_IN_WR0_MSK, hpf_band));
+ if (ret)
+ return ret;
+
+ return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER,
+ ADMV8818_HPF_WR0_MSK,
+ FIELD_PREP(ADMV8818_HPF_WR0_MSK, hpf_step));
+}
+
+static int admv8818_hpf_select(struct admv8818_state *st, u64 freq)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv8818_hpf_select(st, freq);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __admv8818_lpf_select(struct admv8818_state *st, u64 freq)
+{
+ unsigned int lpf_step = 0, lpf_band = 0, i, j;
+ u64 freq_step;
+ int ret;
+
+ if (freq > freq_range_lpf[3][1])
+ goto lpf_write;
+
+ if (freq < freq_range_lpf[0][0]) {
+ lpf_band = 1;
+
+ goto lpf_write;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (freq > freq_range_lpf[i][0] && freq < freq_range_lpf[i][1]) {
+ lpf_band = i + 1;
+ freq_step = div_u64((freq_range_lpf[i][1] - freq_range_lpf[i][0]), 15);
+
+ for (j = 0; j <= 15; j++) {
+ if (freq < (freq_range_lpf[i][0] + (freq_step * j))) {
+ lpf_step = j;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+lpf_write:
+ ret = regmap_update_bits(st->regmap, ADMV8818_REG_WR0_SW,
+ ADMV8818_SW_OUT_SET_WR0_MSK |
+ ADMV8818_SW_OUT_WR0_MSK,
+ FIELD_PREP(ADMV8818_SW_OUT_SET_WR0_MSK, 1) |
+ FIELD_PREP(ADMV8818_SW_OUT_WR0_MSK, lpf_band));
+ if (ret)
+ return ret;
+
+ return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER,
+ ADMV8818_LPF_WR0_MSK,
+ FIELD_PREP(ADMV8818_LPF_WR0_MSK, lpf_step));
+}
+
+static int admv8818_lpf_select(struct admv8818_state *st, u64 freq)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv8818_lpf_select(st, freq);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int admv8818_rfin_band_select(struct admv8818_state *st)
+{
+ int ret;
+
+ st->cf_hz = clk_get_rate(st->clkin);
+
+ mutex_lock(&st->lock);
+
+ ret = __admv8818_hpf_select(st, st->cf_hz);
+ if (ret)
+ goto exit;
+
+ ret = __admv8818_lpf_select(st, st->cf_hz);
+exit:
+ mutex_unlock(&st->lock);
+ return ret;
+}
+
+static int __admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq)
+{
+ unsigned int data, hpf_band, hpf_state;
+ int ret;
+
+ ret = regmap_read(st->regmap, ADMV8818_REG_WR0_SW, &data);
+ if (ret)
+ return ret;
+
+ hpf_band = FIELD_GET(ADMV8818_SW_IN_WR0_MSK, data);
+ if (!hpf_band) {
+ *hpf_freq = 0;
+ return ret;
+ }
+
+ ret = regmap_read(st->regmap, ADMV8818_REG_WR0_FILTER, &data);
+ if (ret)
+ return ret;
+
+ hpf_state = FIELD_GET(ADMV8818_HPF_WR0_MSK, data);
+
+ *hpf_freq = div_u64(freq_range_hpf[hpf_band - 1][1] - freq_range_hpf[hpf_band - 1][0], 15);
+ *hpf_freq = freq_range_hpf[hpf_band - 1][0] + (*hpf_freq * hpf_state);
+
+ return ret;
+}
+
+static int admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv8818_read_hpf_freq(st, hpf_freq);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq)
+{
+ unsigned int data, lpf_band, lpf_state;
+ int ret;
+
+ ret = regmap_read(st->regmap, ADMV8818_REG_WR0_SW, &data);
+ if (ret)
+ return ret;
+
+ lpf_band = FIELD_GET(ADMV8818_SW_OUT_WR0_MSK, data);
+ if (!lpf_band) {
+ *lpf_freq = 0;
+ return ret;
+ }
+
+ ret = regmap_read(st->regmap, ADMV8818_REG_WR0_FILTER, &data);
+ if (ret)
+ return ret;
+
+ lpf_state = FIELD_GET(ADMV8818_LPF_WR0_MSK, data);
+
+ *lpf_freq = div_u64(freq_range_lpf[lpf_band - 1][1] - freq_range_lpf[lpf_band - 1][0], 15);
+ *lpf_freq = freq_range_lpf[lpf_band - 1][0] + (*lpf_freq * lpf_state);
+
+ return ret;
+}
+
+static int admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv8818_read_lpf_freq(st, lpf_freq);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int admv8818_write_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int val, int val2, long info)
+{
+ struct admv8818_state *st = iio_priv(indio_dev);
+
+ u64 freq = ((u64)val2 << 32 | (u32)val);
+
+ switch (info) {
+ case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+ return admv8818_lpf_select(st, freq);
+ case IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY:
+ return admv8818_hpf_select(st, freq);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int admv8818_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2, long info)
+{
+ struct admv8818_state *st = iio_priv(indio_dev);
+ int ret;
+ u64 freq;
+
+ switch (info) {
+ case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
+ ret = admv8818_read_lpf_freq(st, &freq);
+ if (ret)
+ return ret;
+
+ *val = (u32)freq;
+ *val2 = (u32)(freq >> 32);
+
+ return IIO_VAL_INT_64;
+ case IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY:
+ ret = admv8818_read_hpf_freq(st, &freq);
+ if (ret)
+ return ret;
+
+ *val = (u32)freq;
+ *val2 = (u32)(freq >> 32);
+
+ return IIO_VAL_INT_64;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int admv8818_reg_access(struct iio_dev *indio_dev,
+ unsigned int reg,
+ unsigned int write_val,
+ unsigned int *read_val)
+{
+ struct admv8818_state *st = iio_priv(indio_dev);
+
+ if (read_val)
+ return regmap_read(st->regmap, reg, read_val);
+ else
+ return regmap_write(st->regmap, reg, write_val);
+}
+
+static int admv8818_get_mode(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan)
+{
+ struct admv8818_state *st = iio_priv(indio_dev);
+
+ return st->filter_mode;
+}
+
+static int admv8818_set_mode(struct iio_dev *indio_dev,
+ const struct iio_chan_spec *chan,
+ unsigned int mode)
+{
+ struct admv8818_state *st = iio_priv(indio_dev);
+ int ret = 0;
+
+ if (!st->clkin) {
+ if (mode == ADMV8818_MANUAL_MODE)
+ return 0;
+
+ return -EINVAL;
+ }
+
+ switch (mode) {
+ case ADMV8818_AUTO_MODE:
+ if (!st->filter_mode)
+ return 0;
+
+ ret = clk_prepare_enable(st->clkin);
+ if (ret)
+ return ret;
+
+ ret = clk_notifier_register(st->clkin, &st->nb);
+ if (ret) {
+ clk_disable_unprepare(st->clkin);
+
+ return ret;
+ }
+
+ break;
+ case ADMV8818_MANUAL_MODE:
+ if (st->filter_mode)
+ return 0;
+
+ clk_disable_unprepare(st->clkin);
+
+ ret = clk_notifier_unregister(st->clkin, &st->nb);
+ if (ret)
+ return ret;
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ st->filter_mode = mode;
+
+ return ret;
+}
+
+static const struct iio_info admv8818_info = {
+ .write_raw = admv8818_write_raw,
+ .read_raw = admv8818_read_raw,
+ .debugfs_reg_access = &admv8818_reg_access,
+};
+
+static const struct iio_enum admv8818_mode_enum = {
+ .items = admv8818_modes,
+ .num_items = ARRAY_SIZE(admv8818_modes),
+ .get = admv8818_get_mode,
+ .set = admv8818_set_mode,
+};
+
+static const struct iio_chan_spec_ext_info admv8818_ext_info[] = {
+ IIO_ENUM("filter_mode", IIO_SHARED_BY_ALL, &admv8818_mode_enum),
+ IIO_ENUM_AVAILABLE("filter_mode", IIO_SHARED_BY_ALL, &admv8818_mode_enum),
+ { },
+};
+
+#define ADMV8818_CHAN(_channel) { \
+ .type = IIO_ALTVOLTAGE, \
+ .output = 1, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .info_mask_separate = \
+ BIT(IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY) | \
+ BIT(IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY) \
+}
+
+#define ADMV8818_CHAN_BW_CF(_channel, _admv8818_ext_info) { \
+ .type = IIO_ALTVOLTAGE, \
+ .output = 1, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .ext_info = _admv8818_ext_info, \
+}
+
+static const struct iio_chan_spec admv8818_channels[] = {
+ ADMV8818_CHAN(0),
+ ADMV8818_CHAN_BW_CF(0, admv8818_ext_info),
+};
+
+static int admv8818_freq_change(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct admv8818_state *st = container_of(nb, struct admv8818_state, nb);
+
+ if (action == POST_RATE_CHANGE)
+ return notifier_from_errno(admv8818_rfin_band_select(st));
+
+ return NOTIFY_OK;
+}
+
+static void admv8818_clk_notifier_unreg(void *data)
+{
+ struct admv8818_state *st = data;
+
+ if (st->filter_mode == 0)
+ clk_notifier_unregister(st->clkin, &st->nb);
+}
+
+static void admv8818_clk_disable(void *data)
+{
+ struct admv8818_state *st = data;
+
+ if (st->filter_mode == 0)
+ clk_disable_unprepare(st->clkin);
+}
+
+static int admv8818_init(struct admv8818_state *st)
+{
+ int ret;
+ struct spi_device *spi = st->spi;
+ unsigned int chip_id;
+
+ ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A,
+ ADMV8818_SOFTRESET_N_MSK |
+ ADMV8818_SOFTRESET_MSK,
+ FIELD_PREP(ADMV8818_SOFTRESET_N_MSK, 1) |
+ FIELD_PREP(ADMV8818_SOFTRESET_MSK, 1));
+ if (ret) {
+ dev_err(&spi->dev, "ADMV8818 Soft Reset failed.\n");
+ return ret;
+ }
+
+ ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A,
+ ADMV8818_SDOACTIVE_N_MSK |
+ ADMV8818_SDOACTIVE_MSK,
+ FIELD_PREP(ADMV8818_SDOACTIVE_N_MSK, 1) |
+ FIELD_PREP(ADMV8818_SDOACTIVE_MSK, 1));
+ if (ret) {
+ dev_err(&spi->dev, "ADMV8818 SDO Enable failed.\n");
+ return ret;
+ }
+
+ ret = regmap_read(st->regmap, ADMV8818_REG_CHIPTYPE, &chip_id);
+ if (ret) {
+ dev_err(&spi->dev, "ADMV8818 Chip ID read failed.\n");
+ return ret;
+ }
+
+ if (chip_id != 0x1) {
+ dev_err(&spi->dev, "ADMV8818 Invalid Chip ID.\n");
+ return -EINVAL;
+ }
+
+ ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_B,
+ ADMV8818_SINGLE_INSTRUCTION_MSK,
+ FIELD_PREP(ADMV8818_SINGLE_INSTRUCTION_MSK, 1));
+ if (ret) {
+ dev_err(&spi->dev, "ADMV8818 Single Instruction failed.\n");
+ return ret;
+ }
+
+ if (st->clkin)
+ return admv8818_rfin_band_select(st);
+ else
+ return 0;
+}
+
+static int admv8818_clk_setup(struct admv8818_state *st)
+{
+ struct spi_device *spi = st->spi;
+ int ret;
+
+ st->clkin = devm_clk_get_optional(&spi->dev, "rf_in");
+ if (IS_ERR(st->clkin))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->clkin),
+ "failed to get the input clock\n");
+ else if (!st->clkin)
+ return 0;
+
+ ret = clk_prepare_enable(st->clkin);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(&spi->dev, admv8818_clk_disable, st);
+ if (ret)
+ return ret;
+
+ st->nb.notifier_call = admv8818_freq_change;
+ ret = clk_notifier_register(st->clkin, &st->nb);
+ if (ret < 0)
+ return ret;
+
+ return devm_add_action_or_reset(&spi->dev, admv8818_clk_notifier_unreg, st);
+}
+
+static int admv8818_probe(struct spi_device *spi)
+{
+ struct iio_dev *indio_dev;
+ struct regmap *regmap;
+ struct admv8818_state *st;
+ int ret;
+
+ indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ regmap = devm_regmap_init_spi(spi, &admv8818_regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ st = iio_priv(indio_dev);
+ st->regmap = regmap;
+
+ indio_dev->info = &admv8818_info;
+ indio_dev->name = "admv8818";
+ indio_dev->channels = admv8818_channels;
+ indio_dev->num_channels = ARRAY_SIZE(admv8818_channels);
+
+ st->spi = spi;
+
+ ret = admv8818_clk_setup(st);
+ if (ret)
+ return ret;
+
+ mutex_init(&st->lock);
+
+ ret = admv8818_init(st);
+ if (ret)
+ return ret;
+
+ return devm_iio_device_register(&spi->dev, indio_dev);
+}
+
+static const struct spi_device_id admv8818_id[] = {
+ { "admv8818", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(spi, admv8818_id);
+
+static const struct of_device_id admv8818_of_match[] = {
+ { .compatible = "adi,admv8818" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, admv8818_of_match);
+
+static struct spi_driver admv8818_driver = {
+ .driver = {
+ .name = "admv8818",
+ .of_match_table = admv8818_of_match,
+ },
+ .probe = admv8818_probe,
+ .id_table = admv8818_id,
+};
+module_spi_driver(admv8818_driver);
+
+MODULE_AUTHOR("Antoniu Miclaus <antoniu.miclaus@analog.com");
+MODULE_DESCRIPTION("Analog Devices ADMV8818");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig
index 2c9e0559e8a4..b44036f843af 100644
--- a/drivers/iio/frequency/Kconfig
+++ b/drivers/iio/frequency/Kconfig
@@ -50,6 +50,16 @@ config ADF4371
To compile this driver as a module, choose M here: the
module will be called adf4371.
+config ADMV1013
+ tristate "Analog Devices ADMV1013 Microwave Upconverter"
+ depends on SPI && COMMON_CLK
+ help
+ Say yes here to build support for Analog Devices ADMV1013
+ 24 GHz to 44 GHz, Wideband, Microwave Upconverter.
+
+ To compile this driver as a module, choose M here: the
+ module will be called admv1013.
+
config ADRF6780
tristate "Analog Devices ADRF6780 Microwave Upconverter"
depends on SPI
diff --git a/drivers/iio/frequency/Makefile b/drivers/iio/frequency/Makefile
index ae3136c79202..ae6899856c99 100644
--- a/drivers/iio/frequency/Makefile
+++ b/drivers/iio/frequency/Makefile
@@ -7,4 +7,5 @@
obj-$(CONFIG_AD9523) += ad9523.o
obj-$(CONFIG_ADF4350) += adf4350.o
obj-$(CONFIG_ADF4371) += adf4371.o
+obj-$(CONFIG_ADMV1013) += admv1013.o
obj-$(CONFIG_ADRF6780) += adrf6780.o
diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c
new file mode 100644
index 000000000000..6cdeb50143af
--- /dev/null
+++ b/drivers/iio/frequency/admv1013.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ADMV1013 driver
+ *
+ * Copyright 2021 Analog Devices Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/iio/iio.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/notifier.h>
+#include <linux/property.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <linux/units.h>
+
+#include <asm/unaligned.h>
+
+/* ADMV1013 Register Map */
+#define ADMV1013_REG_SPI_CONTROL 0x00
+#define ADMV1013_REG_ALARM 0x01
+#define ADMV1013_REG_ALARM_MASKS 0x02
+#define ADMV1013_REG_ENABLE 0x03
+#define ADMV1013_REG_LO_AMP_I 0x05
+#define ADMV1013_REG_LO_AMP_Q 0x06
+#define ADMV1013_REG_OFFSET_ADJUST_I 0x07
+#define ADMV1013_REG_OFFSET_ADJUST_Q 0x08
+#define ADMV1013_REG_QUAD 0x09
+#define ADMV1013_REG_VVA_TEMP_COMP 0x0A
+
+/* ADMV1013_REG_SPI_CONTROL Map */
+#define ADMV1013_PARITY_EN_MSK BIT(15)
+#define ADMV1013_SPI_SOFT_RESET_MSK BIT(14)
+#define ADMV1013_CHIP_ID_MSK GENMASK(11, 4)
+#define ADMV1013_CHIP_ID 0xA
+#define ADMV1013_REVISION_ID_MSK GENMASK(3, 0)
+
+/* ADMV1013_REG_ALARM Map */
+#define ADMV1013_PARITY_ERROR_MSK BIT(15)
+#define ADMV1013_TOO_FEW_ERRORS_MSK BIT(14)
+#define ADMV1013_TOO_MANY_ERRORS_MSK BIT(13)
+#define ADMV1013_ADDRESS_RANGE_ERROR_MSK BIT(12)
+
+/* ADMV1013_REG_ENABLE Map */
+#define ADMV1013_VGA_PD_MSK BIT(15)
+#define ADMV1013_MIXER_PD_MSK BIT(14)
+#define ADMV1013_QUAD_PD_MSK GENMASK(13, 11)
+#define ADMV1013_BG_PD_MSK BIT(10)
+#define ADMV1013_MIXER_IF_EN_MSK BIT(7)
+#define ADMV1013_DET_EN_MSK BIT(5)
+
+/* ADMV1013_REG_LO_AMP Map */
+#define ADMV1013_LOAMP_PH_ADJ_FINE_MSK GENMASK(13, 7)
+#define ADMV1013_MIXER_VGATE_MSK GENMASK(6, 0)
+
+/* ADMV1013_REG_OFFSET_ADJUST Map */
+#define ADMV1013_MIXER_OFF_ADJ_P_MSK GENMASK(15, 9)
+#define ADMV1013_MIXER_OFF_ADJ_N_MSK GENMASK(8, 2)
+
+/* ADMV1013_REG_QUAD Map */
+#define ADMV1013_QUAD_SE_MODE_MSK GENMASK(9, 6)
+#define ADMV1013_QUAD_FILTERS_MSK GENMASK(3, 0)
+
+/* ADMV1013_REG_VVA_TEMP_COMP Map */
+#define ADMV1013_VVA_TEMP_COMP_MSK GENMASK(15, 0)
+
+/* ADMV1013 Miscellaneous Defines */
+#define ADMV1013_READ BIT(7)
+#define ADMV1013_REG_ADDR_READ_MSK GENMASK(6, 1)
+#define ADMV1013_REG_ADDR_WRITE_MSK GENMASK(22, 17)
+#define ADMV1013_REG_DATA_MSK GENMASK(16, 1)
+
+enum {
+ ADMV1013_IQ_MODE,
+ ADMV1013_IF_MODE
+};
+
+enum {
+ ADMV1013_RFMOD_I_CALIBPHASE,
+ ADMV1013_RFMOD_Q_CALIBPHASE,
+};
+
+enum {
+ ADMV1013_SE_MODE_POS = 6,
+ ADMV1013_SE_MODE_NEG = 9,
+ ADMV1013_SE_MODE_DIFF = 12
+};
+
+struct admv1013_state {
+ struct spi_device *spi;
+ struct clk *clkin;
+ /* Protect against concurrent accesses to the device and to data */
+ struct mutex lock;
+ struct regulator *reg;
+ struct notifier_block nb;
+ unsigned int input_mode;
+ unsigned int quad_se_mode;
+ bool det_en;
+ u8 data[3] ____cacheline_aligned;
+};
+
+static int __admv1013_spi_read(struct admv1013_state *st, unsigned int reg,
+ unsigned int *val)
+{
+ int ret;
+ struct spi_transfer t = {0};
+
+ st->data[0] = ADMV1013_READ | FIELD_PREP(ADMV1013_REG_ADDR_READ_MSK, reg);
+ st->data[1] = 0x0;
+ st->data[2] = 0x0;
+
+ t.rx_buf = &st->data[0];
+ t.tx_buf = &st->data[0];
+ t.len = 3;
+
+ ret = spi_sync_transfer(st->spi, &t, 1);
+ if (ret)
+ return ret;
+
+ *val = FIELD_GET(ADMV1013_REG_DATA_MSK, get_unaligned_be24(&st->data[0]));
+
+ return ret;
+}
+
+static int admv1013_spi_read(struct admv1013_state *st, unsigned int reg,
+ unsigned int *val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv1013_spi_read(st, reg, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __admv1013_spi_write(struct admv1013_state *st,
+ unsigned int reg,
+ unsigned int val)
+{
+ put_unaligned_be24(FIELD_PREP(ADMV1013_REG_DATA_MSK, val) |
+ FIELD_PREP(ADMV1013_REG_ADDR_WRITE_MSK, reg), &st->data[0]);
+
+ return spi_write(st->spi, &st->data[0], 3);
+}
+
+static int admv1013_spi_write(struct admv1013_state *st, unsigned int reg,
+ unsigned int val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv1013_spi_write(st, reg, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int __admv1013_spi_update_bits(struct admv1013_state *st, unsigned int reg,
+ unsigned int mask, unsigned int val)
+{
+ int ret;
+ unsigned int data, temp;
+
+ ret = __admv1013_spi_read(st, reg, &data);
+ if (ret)
+ return ret;
+
+ temp = (data & ~mask) | (val & mask);
+
+ return __admv1013_spi_write(st, reg, temp);
+}
+
+static int admv1013_spi_update_bits(struct admv1013_state *st, unsigned int reg,
+ unsigned int mask, unsigned int val)
+{
+ int ret;
+
+ mutex_lock(&st->lock);
+ ret = __admv1013_spi_update_bits(st, reg, mask, val);
+ mutex_unlock(&st->lock);
+
+ return ret;
+}
+
+static int admv1013_read_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2, long info)
+{
+ struct admv1013_state *st = iio_priv(indio_dev);
+ unsigned int data, addr;
+ int ret;
+
+ switch (info) {
+ case IIO_CHAN_INFO_CALIBBIAS:
+ switch (chan->channel) {
+ case IIO_MOD_I:
+ addr = ADMV1013_REG_OFFSET_ADJUST_I;
+ break;
+ case IIO_MOD_Q:
+ addr = ADMV1013_REG_OFFSET_ADJUST_Q;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = admv1013_spi_read(st, addr, &data);
+ if (ret)
+ return ret;
+
+ if (!chan->channel)
+ *val = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_P_MSK, data);
+ else
+ *val = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_N_MSK, data);
+
+ return IIO_VAL_INT;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int admv1013_write_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ int val, int val2, long info)
+{
+ struct admv1013_state *st = iio_priv(indio_dev);
+ unsigned int addr, data, msk;
+
+ switch (info) {
+ case IIO_CHAN_INFO_CALIBBIAS:
+ switch (chan->channel2) {
+ case IIO_MOD_I:
+ addr = ADMV1013_REG_OFFSET_ADJUST_I;
+ break;
+ case IIO_MOD_Q:
+ addr = ADMV1013_REG_OFFSET_ADJUST_Q;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!chan->channel) {
+ msk = ADMV1013_MIXER_OFF_ADJ_P_MSK;
+ data = FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_P_MSK, val);
+ } else {
+ msk = ADMV1013_MIXER_OFF_ADJ_N_MSK;
+ data = FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_N_MSK, val);
+ }
+
+ return admv1013_spi_update_bits(st, addr, msk, data);
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t admv1013_read(struct iio_dev *indio_dev,
+ uintptr_t private,
+ const struct iio_chan_spec *chan,
+ char *buf)
+{
+ struct admv1013_state *st = iio_priv(indio_dev);
+ unsigned int data, addr;
+ int ret;
+
+ switch ((u32)private) {
+ case ADMV1013_RFMOD_I_CALIBPHASE:
+ addr = ADMV1013_REG_LO_AMP_I;
+ break;
+ case ADMV1013_RFMOD_Q_CALIBPHASE:
+ addr = ADMV1013_REG_LO_AMP_Q;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = admv1013_spi_read(st, addr, &data);
+ if (ret)
+ return ret;
+
+ data = FIELD_GET(ADMV1013_LOAMP_PH_ADJ_FINE_MSK, data);
+
+ return sysfs_emit(buf, "%u\n", data);
+}
+
+static ssize_t admv1013_write(struct iio_dev *indio_dev,
+ uintptr_t private,
+ const struct iio_chan_spec *chan,
+ const char *buf, size_t len)
+{
+ struct admv1013_state *st = iio_priv(indio_dev);
+ unsigned int data;
+ int ret;
+
+ ret = kstrtou32(buf, 10, &data);
+ if (ret)
+ return ret;
+
+ data = FIELD_PREP(ADMV1013_LOAMP_PH_ADJ_FINE_MSK, data);
+
+ switch ((u32)private) {
+ case ADMV1013_RFMOD_I_CALIBPHASE:
+ ret = admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_I,
+ ADMV1013_LOAMP_PH_ADJ_FINE_MSK,
+ data);
+ if (ret)
+ return ret;
+ break;
+ case ADMV1013_RFMOD_Q_CALIBPHASE:
+ ret = admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_Q,
+ ADMV1013_LOAMP_PH_ADJ_FINE_MSK,
+ data);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret ? ret : len;
+}
+
+static int admv1013_update_quad_filters(struct admv1013_state *st)
+{
+ unsigned int filt_raw;
+ u64 rate = clk_get_rate(st->clkin);
+
+ if (rate >= (5400 * HZ_PER_MHZ) && rate <= (7000 * HZ_PER_MHZ))
+ filt_raw = 15;
+ else if (rate >= (5400 * HZ_PER_MHZ) && rate <= (8000 * HZ_PER_MHZ))
+ filt_raw = 10;
+ else if (rate >= (6600 * HZ_PER_MHZ) && rate <= (9200 * HZ_PER_MHZ))
+ filt_raw = 5;
+ else
+ filt_raw = 0;
+
+ return __admv1013_spi_update_bits(st, ADMV1013_REG_QUAD,
+ ADMV1013_QUAD_FILTERS_MSK,
+ FIELD_PREP(ADMV1013_QUAD_FILTERS_MSK, filt_raw));
+}
+
+static int admv1013_update_mixer_vgate(struct admv1013_state *st)
+{
+ unsigned int vcm, mixer_vgate;
+
+ vcm = regulator_get_voltage(st->reg);
+
+ if (vcm >= 0 && vcm < 1800000)
+ mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
+ else if (vcm > 1800000 && vcm < 2600000)
+ mixer_vgate = (2375 * vcm / 1000000 + 125) / 100;
+ else
+ return -EINVAL;
+
+ return __admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_I,
+ ADMV1013_MIXER_VGATE_MSK,
+ FIELD_PREP(ADMV1013_MIXER_VGATE_MSK, mixer_vgate));
+}
+
+static int admv1013_reg_access(struct iio_dev *indio_dev,
+ unsigned int reg,
+ unsigned int write_val,
+ unsigned int *read_val)
+{
+ struct admv1013_state *st = iio_priv(indio_dev);
+
+ if (read_val)
+ return admv1013_spi_read(st, reg, read_val);
+ else
+ return admv1013_spi_write(st, reg, write_val);
+}
+
+static const struct iio_info admv1013_info = {
+ .read_raw = admv1013_read_raw,
+ .write_raw = admv1013_write_raw,
+ .debugfs_reg_access = &admv1013_reg_access,
+};
+
+static int admv1013_freq_change(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct admv1013_state *st = container_of(nb, struct admv1013_state, nb);
+ int ret;
+
+ if (action == POST_RATE_CHANGE) {
+ mutex_lock(&st->lock);
+ ret = notifier_from_errno(admv1013_update_quad_filters(st));
+ mutex_unlock(&st->lock);
+ return ret;
+ }
+
+ return NOTIFY_OK;
+}
+
+#define _ADMV1013_EXT_INFO(_name, _shared, _ident) { \
+ .name = _name, \
+ .read = admv1013_read, \
+ .write = admv1013_write, \
+ .private = _ident, \
+ .shared = _shared, \
+}
+
+static const struct iio_chan_spec_ext_info admv1013_ext_info[] = {
+ _ADMV1013_EXT_INFO("i_calibphase", IIO_SEPARATE, ADMV1013_RFMOD_I_CALIBPHASE),
+ _ADMV1013_EXT_INFO("q_calibphase", IIO_SEPARATE, ADMV1013_RFMOD_Q_CALIBPHASE),
+ { },
+};
+
+#define ADMV1013_CHAN_PHASE(_channel, _channel2, _admv1013_ext_info) { \
+ .type = IIO_ALTVOLTAGE, \
+ .output = 0, \
+ .indexed = 1, \
+ .channel2 = _channel2, \
+ .channel = _channel, \
+ .differential = 1, \
+ .ext_info = _admv1013_ext_info, \
+ }
+
+#define ADMV1013_CHAN_CALIB(_channel, rf_comp) { \
+ .type = IIO_ALTVOLTAGE, \
+ .output = 0, \
+ .indexed = 1, \
+ .channel = _channel, \
+ .channel2 = IIO_MOD_##rf_comp, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_CALIBBIAS), \
+ }
+
+static const struct iio_chan_spec admv1013_channels[] = {
+ ADMV1013_CHAN_PHASE(0, 1, admv1013_ext_info),
+ ADMV1013_CHAN_CALIB(0, I),
+ ADMV1013_CHAN_CALIB(0, Q),
+ ADMV1013_CHAN_CALIB(1, I),
+ ADMV1013_CHAN_CALIB(1, Q),
+};
+
+static int admv1013_init(struct admv1013_state *st)
+{
+ int ret;
+ unsigned int data;
+ struct spi_device *spi = st->spi;
+
+ /* Perform a software reset */
+ ret = __admv1013_spi_update_bits(st, ADMV1013_REG_SPI_CONTROL,
+ ADMV1013_SPI_SOFT_RESET_MSK,
+ FIELD_PREP(ADMV1013_SPI_SOFT_RESET_MSK, 1));
+ if (ret)
+ return ret;
+
+ ret = __admv1013_spi_update_bits(st, ADMV1013_REG_SPI_CONTROL,
+ ADMV1013_SPI_SOFT_RESET_MSK,
+ FIELD_PREP(ADMV1013_SPI_SOFT_RESET_MSK, 0));
+ if (ret)
+ return ret;
+
+ ret = __admv1013_spi_read(st, ADMV1013_REG_SPI_CONTROL, &data);
+ if (ret)
+ return ret;
+
+ data = FIELD_GET(ADMV1013_CHIP_ID_MSK, data);
+ if (data != ADMV1013_CHIP_ID) {
+ dev_err(&spi->dev, "Invalid Chip ID.\n");
+ return -EINVAL;
+ }
+
+ ret = __admv1013_spi_write(st, ADMV1013_REG_VVA_TEMP_COMP, 0xE700);
+ if (ret)
+ return ret;
+
+ data = FIELD_PREP(ADMV1013_QUAD_SE_MODE_MSK, st->quad_se_mode);
+
+ ret = __admv1013_spi_update_bits(st, ADMV1013_REG_QUAD,
+ ADMV1013_QUAD_SE_MODE_MSK, data);
+ if (ret)
+ return ret;
+
+ ret = admv1013_update_mixer_vgate(st);
+ if (ret)
+ return ret;
+
+ ret = admv1013_update_quad_filters(st);
+ if (ret)
+ return ret;
+
+ return __admv1013_spi_update_bits(st, ADMV1013_REG_ENABLE,
+ ADMV1013_DET_EN_MSK |
+ ADMV1013_MIXER_IF_EN_MSK,
+ st->det_en |
+ st->input_mode);
+}
+
+static void admv1013_clk_disable(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static void admv1013_reg_disable(void *data)
+{
+ regulator_disable(data);
+}
+
+static void admv1013_powerdown(void *data)
+{
+ unsigned int enable_reg, enable_reg_msk;
+
+ /* Disable all components in the Enable Register */
+ enable_reg_msk = ADMV1013_VGA_PD_MSK |
+ ADMV1013_MIXER_PD_MSK |
+ ADMV1013_QUAD_PD_MSK |
+ ADMV1013_BG_PD_MSK |
+ ADMV1013_MIXER_IF_EN_MSK |
+ ADMV1013_DET_EN_MSK;
+
+ enable_reg = FIELD_PREP(ADMV1013_VGA_PD_MSK, 1) |
+ FIELD_PREP(ADMV1013_MIXER_PD_MSK, 1) |
+ FIELD_PREP(ADMV1013_QUAD_PD_MSK, 7) |
+ FIELD_PREP(ADMV1013_BG_PD_MSK, 1) |
+ FIELD_PREP(ADMV1013_MIXER_IF_EN_MSK, 0) |
+ FIELD_PREP(ADMV1013_DET_EN_MSK, 0);
+
+ admv1013_spi_update_bits(data, ADMV1013_REG_ENABLE, enable_reg_msk, enable_reg);
+}
+
+static int admv1013_properties_parse(struct admv1013_state *st)
+{
+ int ret;
+ const char *str;
+ struct spi_device *spi = st->spi;
+
+ st->det_en = device_property_read_bool(&spi->dev, "adi,detector-enable");
+
+ ret = device_property_read_string(&spi->dev, "adi,input-mode", &str);
+ if (ret)
+ st->input_mode = ADMV1013_IQ_MODE;
+
+ if (!strcmp(str, "iq"))
+ st->input_mode = ADMV1013_IQ_MODE;
+ else if (!strcmp(str, "if"))
+ st->input_mode = ADMV1013_IF_MODE;
+ else
+ return -EINVAL;
+
+ ret = device_property_read_string(&spi->dev, "adi,quad-se-mode", &str);
+ if (ret)
+ st->quad_se_mode = ADMV1013_SE_MODE_DIFF;
+
+ if (!strcmp(str, "diff"))
+ st->quad_se_mode = ADMV1013_SE_MODE_DIFF;
+ else if (!strcmp(str, "se-pos"))
+ st->quad_se_mode = ADMV1013_SE_MODE_POS;
+ else if (!strcmp(str, "se-neg"))
+ st->quad_se_mode = ADMV1013_SE_MODE_NEG;
+ else
+ return -EINVAL;
+
+ st->reg = devm_regulator_get(&spi->dev, "vcm");
+ if (IS_ERR(st->reg))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->reg),
+ "failed to get the common-mode voltage\n");
+
+ st->clkin = devm_clk_get(&spi->dev, "lo_in");
+ if (IS_ERR(st->clkin))
+ return dev_err_probe(&spi->dev, PTR_ERR(st->clkin),
+ "failed to get the LO input clock\n");
+
+ return 0;
+}
+
+static int admv1013_probe(struct spi_device *spi)
+{
+ struct iio_dev *indio_dev;
+ struct admv1013_state *st;
+ int ret;
+
+ indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+ if (!indio_dev)
+ return -ENOMEM;
+
+ st = iio_priv(indio_dev);
+
+ indio_dev->info = &admv1013_info;
+ indio_dev->name = "admv1013";
+ indio_dev->channels = admv1013_channels;
+ indio_dev->num_channels = ARRAY_SIZE(admv1013_channels);
+
+ st->spi = spi;
+
+ ret = admv1013_properties_parse(st);
+ if (ret)
+ return ret;
+
+ ret = regulator_enable(st->reg);
+ if (ret) {
+ dev_err(&spi->dev, "Failed to enable specified Common-Mode Voltage!\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(&spi->dev, admv1013_reg_disable,
+ st->reg);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(st->clkin);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(&spi->dev, admv1013_clk_disable, st->clkin);
+ if (ret)
+ return ret;
+
+ st->nb.notifier_call = admv1013_freq_change;
+ ret = devm_clk_notifier_register(&spi->dev, st->clkin, &st->nb);
+ if (ret)
+ return ret;
+
+ mutex_init(&st->lock);
+
+ ret = admv1013_init(st);
+ if (ret) {
+ dev_err(&spi->dev, "admv1013 init failed\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(&spi->dev, admv1013_powerdown, st);
+ if (ret)
+ return ret;
+
+ return devm_iio_device_register(&spi->dev, indio_dev);
+}
+
+static const struct spi_device_id admv1013_id[] = {
+ { "admv1013", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(spi, admv1013_id);
+
+static const struct of_device_id admv1013_of_match[] = {
+ { .compatible = "adi,admv1013" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, admv1013_of_match);
+
+static struct spi_driver admv1013_driver = {
+ .driver = {
+ .name = "admv1013",
+ .of_match_table = admv1013_of_match,
+ },
+ .probe = admv1013_probe,
+ .id_table = admv1013_id,
+};
+module_spi_driver(admv1013_driver);
+
+MODULE_AUTHOR("Antoniu Miclaus <antoniu.miclaus@analog.com");
+MODULE_DESCRIPTION("Analog Devices ADMV1013");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
index 97b82f9a8e45..273f16dcaff8 100644
--- a/drivers/iio/health/afe4403.c
+++ b/drivers/iio/health/afe4403.c
@@ -345,9 +345,6 @@ err:
return IRQ_HANDLED;
}
-static const struct iio_trigger_ops afe4403_trigger_ops = {
-};
-
#define AFE4403_TIMING_PAIRS \
{ AFE440X_LED2STC, 0x000050 }, \
{ AFE440X_LED2ENDC, 0x0003e7 }, \
@@ -530,8 +527,6 @@ static int afe4403_probe(struct spi_device *spi)
iio_trigger_set_drvdata(afe->trig, indio_dev);
- afe->trig->ops = &afe4403_trigger_ops;
-
ret = iio_trigger_register(afe->trig);
if (ret) {
dev_err(afe->dev, "Unable to register IIO trigger\n");
diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c
index 7ef3f5e34de5..aa9311e1e655 100644
--- a/drivers/iio/health/afe4404.c
+++ b/drivers/iio/health/afe4404.c
@@ -347,9 +347,6 @@ err:
return IRQ_HANDLED;
}
-static const struct iio_trigger_ops afe4404_trigger_ops = {
-};
-
/* Default timings from data-sheet */
#define AFE4404_TIMING_PAIRS \
{ AFE440X_PRPCOUNT, 39999 }, \
@@ -537,8 +534,6 @@ static int afe4404_probe(struct i2c_client *client,
iio_trigger_set_drvdata(afe->trig, indio_dev);
- afe->trig->ops = &afe4404_trigger_ops;
-
ret = iio_trigger_register(afe->trig);
if (ret) {
dev_err(afe->dev, "Unable to register IIO trigger\n");
diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index 61e318431de9..501e286702ef 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -16,7 +16,7 @@ struct iio_buffer;
struct iio_chan_spec;
struct iio_dev;
-extern struct device_type iio_device_type;
+extern const struct device_type iio_device_type;
struct iio_dev_buffer_pair {
struct iio_dev *indio_dev;
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
index 85b1934cec60..33d9afb1ba91 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
@@ -58,7 +58,7 @@ static int inv_icm42600_probe(struct i2c_client *client)
match = device_get_match_data(&client->dev);
if (!match)
return -EINVAL;
- chip = (enum inv_icm42600_chip)match;
+ chip = (uintptr_t)match;
regmap = devm_regmap_init_i2c(client, &inv_icm42600_regmap_config);
if (IS_ERR(regmap))
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
index 323789697a08..e6305e5fa975 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
@@ -57,7 +57,7 @@ static int inv_icm42600_probe(struct spi_device *spi)
match = device_get_match_data(&spi->dev);
if (!match)
return -EINVAL;
- chip = (enum inv_icm42600_chip)match;
+ chip = (uintptr_t)match;
regmap = devm_regmap_init_spi(spi, &inv_icm42600_regmap_config);
if (IS_ERR(regmap))
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
index 3ef17e3f50e2..fe03707ec2d3 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
@@ -110,7 +110,7 @@ static int inv_mpu_probe(struct i2c_client *client,
match = device_get_match_data(&client->dev);
if (match) {
- chip_type = (enum inv_devices)match;
+ chip_type = (uintptr_t)match;
name = client->name;
} else if (id) {
chip_type = (enum inv_devices)
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
index b056f3fe2561..6800356b25fb 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
@@ -45,7 +45,7 @@ static int inv_mpu_probe(struct spi_device *spi)
chip_type = (enum inv_devices)spi_id->driver_data;
name = spi_id->name;
} else if ((match = device_get_match_data(&spi->dev))) {
- chip_type = (enum inv_devices)match;
+ chip_type = (uintptr_t)match;
name = dev_name(&spi->dev);
} else {
return -ENODEV;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index f2cbbc756459..727b4b6ac696 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -2244,7 +2244,9 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id,
return err;
hub_settings = &hw->settings->shub_settings;
- if (hub_settings->master_en.addr) {
+ if (hub_settings->master_en.addr &&
+ (!dev_fwnode(dev) ||
+ !device_property_read_bool(dev, "st,disable-sensor-hub"))) {
err = st_lsm6dsx_shub_probe(hw, name);
if (err < 0)
return err;
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index e180728914c0..94eb9f6cf128 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -1727,8 +1727,7 @@ int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
const struct iio_chan_spec *channels;
struct iio_buffer *buffer;
- int unwind_idx;
- int ret, i;
+ int ret, i, idx;
size_t sz;
channels = indio_dev->channels;
@@ -1743,15 +1742,12 @@ int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
if (!iio_dev_opaque->attached_buffers_cnt)
return 0;
- for (i = 0; i < iio_dev_opaque->attached_buffers_cnt; i++) {
- buffer = iio_dev_opaque->attached_buffers[i];
- ret = __iio_buffer_alloc_sysfs_and_mask(buffer, indio_dev, i);
- if (ret) {
- unwind_idx = i - 1;
+ for (idx = 0; idx < iio_dev_opaque->attached_buffers_cnt; idx++) {
+ buffer = iio_dev_opaque->attached_buffers[idx];
+ ret = __iio_buffer_alloc_sysfs_and_mask(buffer, indio_dev, idx);
+ if (ret)
goto error_unwind_sysfs_and_mask;
- }
}
- unwind_idx = iio_dev_opaque->attached_buffers_cnt - 1;
sz = sizeof(*(iio_dev_opaque->buffer_ioctl_handler));
iio_dev_opaque->buffer_ioctl_handler = kzalloc(sz, GFP_KERNEL);
@@ -1767,9 +1763,9 @@ int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
return 0;
error_unwind_sysfs_and_mask:
- for (; unwind_idx >= 0; unwind_idx--) {
- buffer = iio_dev_opaque->attached_buffers[unwind_idx];
- __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, unwind_idx);
+ while (idx--) {
+ buffer = iio_dev_opaque->attached_buffers[idx];
+ __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, idx);
}
return ret;
}
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 463a63d5bf56..409c278a4c2c 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -702,6 +702,9 @@ static ssize_t __iio_format_value(char *buf, size_t offset, unsigned int type,
}
case IIO_VAL_CHAR:
return sysfs_emit_at(buf, offset, "%c", (char)vals[0]);
+ case IIO_VAL_INT_64:
+ tmp2 = (s64)((((u64)vals[1]) << 32) | (u32)vals[0]);
+ return sysfs_emit_at(buf, offset, "%lld", tmp2);
default:
return 0;
}
@@ -1619,7 +1622,7 @@ static void iio_dev_release(struct device *device)
kfree(iio_dev_opaque);
}
-struct device_type iio_device_type = {
+const struct device_type iio_device_type = {
.name = "iio_device",
.release = iio_dev_release,
};
@@ -1653,7 +1656,6 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv)
indio_dev->dev.type = &iio_device_type;
indio_dev->dev.bus = &iio_bus_type;
device_initialize(&indio_dev->dev);
- iio_device_set_drvdata(indio_dev, (void *)indio_dev);
mutex_init(&indio_dev->mlock);
mutex_init(&iio_dev_opaque->info_exist_lock);
INIT_LIST_HEAD(&iio_dev_opaque->channel_attr_list);
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 93990ff1dfe3..f504ed351b3e 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -162,6 +162,39 @@ static struct iio_trigger *iio_trigger_acquire_by_name(const char *name)
return trig;
}
+static void iio_reenable_work_fn(struct work_struct *work)
+{
+ struct iio_trigger *trig = container_of(work, struct iio_trigger,
+ reenable_work);
+
+ /*
+ * This 'might' occur after the trigger state is set to disabled -
+ * in that case the driver should skip reenabling.
+ */
+ trig->ops->reenable(trig);
+}
+
+/*
+ * In general, reenable callbacks may need to sleep and this path is
+ * not performance sensitive, so just queue up a work item
+ * to reneable the trigger for us.
+ *
+ * Races that can cause this.
+ * 1) A handler occurs entirely in interrupt context so the counter
+ * the final decrement is still in this interrupt.
+ * 2) The trigger has been removed, but one last interrupt gets through.
+ *
+ * For (1) we must call reenable, but not in atomic context.
+ * For (2) it should be safe to call reenanble, if drivers never blindly
+ * reenable after state is off.
+ */
+static void iio_trigger_notify_done_atomic(struct iio_trigger *trig)
+{
+ if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
+ trig->ops->reenable)
+ schedule_work(&trig->reenable_work);
+}
+
void iio_trigger_poll(struct iio_trigger *trig)
{
int i;
@@ -173,7 +206,7 @@ void iio_trigger_poll(struct iio_trigger *trig)
if (trig->subirqs[i].enabled)
generic_handle_irq(trig->subirq_base + i);
else
- iio_trigger_notify_done(trig);
+ iio_trigger_notify_done_atomic(trig);
}
}
}
@@ -535,6 +568,7 @@ struct iio_trigger *viio_trigger_alloc(struct device *parent,
trig->dev.type = &iio_trig_type;
trig->dev.bus = &iio_bus_type;
device_initialize(&trig->dev);
+ INIT_WORK(&trig->reenable_work, iio_reenable_work_fn);
mutex_init(&trig->pool_lock);
trig->subirq_base = irq_alloc_descs(-1, 0,
diff --git a/drivers/iio/light/cm3605.c b/drivers/iio/light/cm3605.c
index 3e7fb16ab1f6..50d34a98839c 100644
--- a/drivers/iio/light/cm3605.c
+++ b/drivers/iio/light/cm3605.c
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
#include <linux/iio/events.h>
@@ -18,7 +19,7 @@
#include <linux/init.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/of.h>
+#include <linux/property.h>
#include <linux/regulator/consumer.h>
#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
@@ -156,7 +157,6 @@ static int cm3605_probe(struct platform_device *pdev)
struct cm3605 *cm3605;
struct iio_dev *indio_dev;
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
enum iio_chan_type ch_type;
u32 rset;
int irq;
@@ -171,7 +171,7 @@ static int cm3605_probe(struct platform_device *pdev)
cm3605->dev = dev;
cm3605->dir = IIO_EV_DIR_FALLING;
- ret = of_property_read_u32(np, "capella,aset-resistance-ohms", &rset);
+ ret = device_property_read_u32(dev, "capella,aset-resistance-ohms", &rset);
if (ret) {
dev_info(dev, "no RSET specified, assuming 100K\n");
rset = 100000;
diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c
index d1d9f2d319e4..b820041159f7 100644
--- a/drivers/iio/light/gp2ap020a00f.c
+++ b/drivers/iio/light/gp2ap020a00f.c
@@ -1467,9 +1467,6 @@ static const struct iio_buffer_setup_ops gp2ap020a00f_buffer_setup_ops = {
.predisable = &gp2ap020a00f_buffer_predisable,
};
-static const struct iio_trigger_ops gp2ap020a00f_trigger_ops = {
-};
-
static int gp2ap020a00f_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
@@ -1550,8 +1547,6 @@ static int gp2ap020a00f_probe(struct i2c_client *client,
goto error_uninit_buffer;
}
- data->trig->ops = &gp2ap020a00f_trigger_ops;
-
init_irq_work(&data->work, gp2ap020a00f_iio_trigger_work);
err = iio_trigger_register(data->trig);
diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index b2983b1a9ed1..47d61ec2bb50 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ltr501.c - Support for Lite-On LTR501 ambient light and proximity sensor
+ * Support for Lite-On LTR501 and similar ambient light and proximity sensors.
*
* Copyright 2014 Peter Meerwald <pmeerw@pmeerw.net>
*
@@ -98,6 +98,7 @@ enum {
ltr501 = 0,
ltr559,
ltr301,
+ ltr303,
};
struct ltr501_gain {
@@ -165,6 +166,7 @@ struct ltr501_data {
struct regmap_field *reg_ps_rate;
struct regmap_field *reg_als_prst;
struct regmap_field *reg_ps_prst;
+ uint32_t near_level;
};
static const struct ltr501_samp_table ltr501_als_samp_table[] = {
@@ -524,6 +526,25 @@ static int ltr501_write_intr_prst(struct ltr501_data *data,
return -EINVAL;
}
+static ssize_t ltr501_read_near_level(struct iio_dev *indio_dev,
+ uintptr_t priv,
+ const struct iio_chan_spec *chan,
+ char *buf)
+{
+ struct ltr501_data *data = iio_priv(indio_dev);
+
+ return sprintf(buf, "%u\n", data->near_level);
+}
+
+static const struct iio_chan_spec_ext_info ltr501_ext_info[] = {
+ {
+ .name = "nearlevel",
+ .shared = IIO_SEPARATE,
+ .read = ltr501_read_near_level,
+ },
+ { /* sentinel */ }
+};
+
static const struct iio_event_spec ltr501_als_event_spec[] = {
{
.type = IIO_EV_TYPE_THRESH,
@@ -608,6 +629,7 @@ static const struct iio_chan_spec ltr501_channels[] = {
},
.event_spec = ltr501_pxs_event_spec,
.num_event_specs = ARRAY_SIZE(ltr501_pxs_event_spec),
+ .ext_info = ltr501_ext_info,
},
IIO_CHAN_SOFT_TIMESTAMP(3),
};
@@ -1231,6 +1253,18 @@ static const struct ltr501_chip_info ltr501_chip_info_tbl[] = {
.channels = ltr301_channels,
.no_channels = ARRAY_SIZE(ltr301_channels),
},
+ [ltr303] = {
+ .partid = 0x0A,
+ .als_gain = ltr559_als_gain_tbl,
+ .als_gain_tbl_size = ARRAY_SIZE(ltr559_als_gain_tbl),
+ .als_mode_active = BIT(0),
+ .als_gain_mask = BIT(2) | BIT(3) | BIT(4),
+ .als_gain_shift = 2,
+ .info = &ltr301_info,
+ .info_no_irq = &ltr301_info_no_irq,
+ .channels = ltr301_channels,
+ .no_channels = ARRAY_SIZE(ltr301_channels),
+ },
};
static int ltr501_write_contr(struct ltr501_data *data, u8 als_val, u8 ps_val)
@@ -1518,6 +1552,10 @@ static int ltr501_probe(struct i2c_client *client,
if ((partid >> 4) != data->chip_info->partid)
return -ENODEV;
+ if (device_property_read_u32(&client->dev, "proximity-near-level",
+ &data->near_level))
+ data->near_level = 0;
+
indio_dev->info = data->chip_info->info;
indio_dev->channels = data->chip_info->channels;
indio_dev->num_channels = data->chip_info->no_channels;
@@ -1605,6 +1643,7 @@ static const struct i2c_device_id ltr501_id[] = {
{ "ltr501", ltr501},
{ "ltr559", ltr559},
{ "ltr301", ltr301},
+ { "ltr303", ltr303},
{ }
};
MODULE_DEVICE_TABLE(i2c, ltr501_id);
@@ -1613,6 +1652,7 @@ static const struct of_device_id ltr501_of_match[] = {
{ .compatible = "liteon,ltr501", },
{ .compatible = "liteon,ltr559", },
{ .compatible = "liteon,ltr301", },
+ { .compatible = "liteon,ltr303", },
{}
};
MODULE_DEVICE_TABLE(of, ltr501_of_match);
diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index 6e82dc54a417..55879a20ae52 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -929,7 +929,7 @@ static int ak8975_probe(struct i2c_client *client,
/* id will be NULL when enumerated via ACPI */
match = device_get_match_data(&client->dev);
if (match) {
- chipset = (enum asahi_compass_chipset)(match);
+ chipset = (uintptr_t)match;
name = dev_name(&client->dev);
} else if (id) {
chipset = (enum asahi_compass_chipset)(id->driver_data);
diff --git a/drivers/iio/magnetometer/hmc5843_core.c b/drivers/iio/magnetometer/hmc5843_core.c
index f08726bf5ec3..5a730d9bdbb0 100644
--- a/drivers/iio/magnetometer/hmc5843_core.c
+++ b/drivers/iio/magnetometer/hmc5843_core.c
@@ -246,7 +246,7 @@ static const struct iio_enum hmc5843_meas_conf_enum = {
static const struct iio_chan_spec_ext_info hmc5843_ext_info[] = {
IIO_ENUM("meas_conf", IIO_SHARED_BY_TYPE, &hmc5843_meas_conf_enum),
- IIO_ENUM_AVAILABLE("meas_conf", &hmc5843_meas_conf_enum),
+ IIO_ENUM_AVAILABLE("meas_conf", IIO_SHARED_BY_TYPE, &hmc5843_meas_conf_enum),
IIO_MOUNT_MATRIX(IIO_SHARED_BY_DIR, hmc5843_get_mount_matrix),
{ }
};
@@ -260,7 +260,7 @@ static const struct iio_enum hmc5983_meas_conf_enum = {
static const struct iio_chan_spec_ext_info hmc5983_ext_info[] = {
IIO_ENUM("meas_conf", IIO_SHARED_BY_TYPE, &hmc5983_meas_conf_enum),
- IIO_ENUM_AVAILABLE("meas_conf", &hmc5983_meas_conf_enum),
+ IIO_ENUM_AVAILABLE("meas_conf", IIO_SHARED_BY_TYPE, &hmc5983_meas_conf_enum),
IIO_MOUNT_MATRIX(IIO_SHARED_BY_DIR, hmc5843_get_mount_matrix),
{ }
};
diff --git a/drivers/iio/magnetometer/mag3110.c b/drivers/iio/magnetometer/mag3110.c
index c96415a1aead..17c62d806218 100644
--- a/drivers/iio/magnetometer/mag3110.c
+++ b/drivers/iio/magnetometer/mag3110.c
@@ -291,7 +291,8 @@ static int mag3110_read_raw(struct iio_dev *indio_dev,
if (ret < 0)
goto release;
*val = sign_extend32(
- be16_to_cpu(buffer[chan->scan_index]), 15);
+ be16_to_cpu(buffer[chan->scan_index]),
+ chan->scan_type.realbits - 1);
ret = IIO_VAL_INT;
break;
case IIO_TEMP: /* in 1 C / LSB */
@@ -306,7 +307,8 @@ static int mag3110_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->lock);
if (ret < 0)
goto release;
- *val = sign_extend32(ret, 7);
+ *val = sign_extend32(ret,
+ chan->scan_type.realbits - 1);
ret = IIO_VAL_INT;
break;
default:
diff --git a/drivers/iio/potentiometer/mcp41010.c b/drivers/iio/potentiometer/mcp41010.c
index 79ccac6d4be0..30a4594d4e11 100644
--- a/drivers/iio/potentiometer/mcp41010.c
+++ b/drivers/iio/potentiometer/mcp41010.c
@@ -21,9 +21,9 @@
#include <linux/iio/iio.h>
#include <linux/iio/types.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
#include <linux/spi/spi.h>
#define MCP41010_MAX_WIPERS 2
@@ -146,7 +146,7 @@ static int mcp41010_probe(struct spi_device *spi)
data = iio_priv(indio_dev);
spi_set_drvdata(spi, indio_dev);
data->spi = spi;
- data->cfg = of_device_get_match_data(&spi->dev);
+ data->cfg = device_get_match_data(&spi->dev);
if (!data->cfg)
data->cfg = &mcp41010_cfg[spi_get_device_id(spi)->driver_data];
diff --git a/drivers/iio/potentiostat/lmp91000.c b/drivers/iio/potentiostat/lmp91000.c
index ed30bdaa10ec..fe514f0b5506 100644
--- a/drivers/iio/potentiostat/lmp91000.c
+++ b/drivers/iio/potentiostat/lmp91000.c
@@ -271,9 +271,6 @@ static int lmp91000_buffer_cb(const void *val, void *private)
return 0;
}
-static const struct iio_trigger_ops lmp91000_trigger_ops = {
-};
-
static int lmp91000_buffer_postenable(struct iio_dev *indio_dev)
{
struct lmp91000_data *data = iio_priv(indio_dev);
@@ -330,7 +327,6 @@ static int lmp91000_probe(struct i2c_client *client,
return -ENOMEM;
}
- data->trig->ops = &lmp91000_trigger_ops;
init_completion(&data->completion);
ret = lmp91000_read_config(data);
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index 6b7da40f99c8..bf8167f43c56 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1138,7 +1138,6 @@ int bmp280_common_probe(struct device *dev,
}
EXPORT_SYMBOL(bmp280_common_probe);
-#ifdef CONFIG_PM
static int bmp280_runtime_suspend(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
@@ -1159,15 +1158,9 @@ static int bmp280_runtime_resume(struct device *dev)
usleep_range(data->start_up_time, data->start_up_time + 100);
return data->chip_info->chip_config(data);
}
-#endif /* CONFIG_PM */
-const struct dev_pm_ops bmp280_dev_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
- pm_runtime_force_resume)
- SET_RUNTIME_PM_OPS(bmp280_runtime_suspend,
- bmp280_runtime_resume, NULL)
-};
-EXPORT_SYMBOL(bmp280_dev_pm_ops);
+EXPORT_RUNTIME_DEV_PM_OPS(bmp280_dev_pm_ops, bmp280_runtime_suspend,
+ bmp280_runtime_resume, NULL);
MODULE_AUTHOR("Vlad Dogaru <vlad.dogaru@intel.com>");
MODULE_DESCRIPTION("Driver for Bosch Sensortec BMP180/BMP280 pressure and temperature sensor");
diff --git a/drivers/iio/pressure/bmp280-i2c.c b/drivers/iio/pressure/bmp280-i2c.c
index 8b03ea15c0d0..35045bd92846 100644
--- a/drivers/iio/pressure/bmp280-i2c.c
+++ b/drivers/iio/pressure/bmp280-i2c.c
@@ -58,7 +58,7 @@ static struct i2c_driver bmp280_i2c_driver = {
.driver = {
.name = "bmp280",
.of_match_table = bmp280_of_i2c_match,
- .pm = &bmp280_dev_pm_ops,
+ .pm = pm_ptr(&bmp280_dev_pm_ops),
},
.probe = bmp280_i2c_probe,
.id_table = bmp280_i2c_id,
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index 625b86878ad8..41f6cc56d229 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -109,7 +109,7 @@ static struct spi_driver bmp280_spi_driver = {
.driver = {
.name = "bmp280",
.of_match_table = bmp280_of_spi_match,
- .pm = &bmp280_dev_pm_ops,
+ .pm = pm_ptr(&bmp280_dev_pm_ops),
},
.id_table = bmp280_spi_id,
.probe = bmp280_spi_probe,
diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c
index 1eb9e7b29e05..e95b9a5475b4 100644
--- a/drivers/iio/pressure/mpl3115.c
+++ b/drivers/iio/pressure/mpl3115.c
@@ -74,7 +74,6 @@ static int mpl3115_read_raw(struct iio_dev *indio_dev,
int *val, int *val2, long mask)
{
struct mpl3115_data *data = iio_priv(indio_dev);
- __be32 tmp = 0;
int ret;
switch (mask) {
@@ -84,7 +83,9 @@ static int mpl3115_read_raw(struct iio_dev *indio_dev,
return ret;
switch (chan->type) {
- case IIO_PRESSURE: /* in 0.25 pascal / LSB */
+ case IIO_PRESSURE: { /* in 0.25 pascal / LSB */
+ __be32 tmp = 0;
+
mutex_lock(&data->lock);
ret = mpl3115_request(data);
if (ret < 0) {
@@ -96,10 +97,13 @@ static int mpl3115_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->lock);
if (ret < 0)
break;
- *val = be32_to_cpu(tmp) >> 12;
+ *val = be32_to_cpu(tmp) >> chan->scan_type.shift;
ret = IIO_VAL_INT;
break;
- case IIO_TEMP: /* in 0.0625 celsius / LSB */
+ }
+ case IIO_TEMP: { /* in 0.0625 celsius / LSB */
+ __be16 tmp;
+
mutex_lock(&data->lock);
ret = mpl3115_request(data);
if (ret < 0) {
@@ -111,9 +115,11 @@ static int mpl3115_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&data->lock);
if (ret < 0)
break;
- *val = sign_extend32(be32_to_cpu(tmp) >> 20, 11);
+ *val = sign_extend32(be16_to_cpu(tmp) >> chan->scan_type.shift,
+ chan->scan_type.realbits - 1);
ret = IIO_VAL_INT;
break;
+ }
default:
ret = -EINVAL;
break;
diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h
index 86b1c4b1820d..cbc9349c342a 100644
--- a/drivers/iio/pressure/ms5611.h
+++ b/drivers/iio/pressure/ms5611.h
@@ -50,9 +50,9 @@ struct ms5611_state {
const struct ms5611_osr *pressure_osr;
const struct ms5611_osr *temp_osr;
- int (*reset)(struct device *dev);
- int (*read_prom_word)(struct device *dev, int index, u16 *word);
- int (*read_adc_temp_and_pressure)(struct device *dev,
+ int (*reset)(struct ms5611_state *st);
+ int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word);
+ int (*read_adc_temp_and_pressure)(struct ms5611_state *st,
s32 *temp, s32 *pressure);
struct ms5611_chip_info *chip_info;
diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c
index ee75f08655c9..a4d0b54cde9b 100644
--- a/drivers/iio/pressure/ms5611_core.c
+++ b/drivers/iio/pressure/ms5611_core.c
@@ -85,8 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev)
struct ms5611_state *st = iio_priv(indio_dev);
for (i = 0; i < MS5611_PROM_WORDS_NB; i++) {
- ret = st->read_prom_word(&indio_dev->dev,
- i, &st->chip_info->prom[i]);
+ ret = st->read_prom_word(st, i, &st->chip_info->prom[i]);
if (ret < 0) {
dev_err(&indio_dev->dev,
"failed to read prom at %d\n", i);
@@ -108,7 +107,7 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev,
int ret;
struct ms5611_state *st = iio_priv(indio_dev);
- ret = st->read_adc_temp_and_pressure(&indio_dev->dev, temp, pressure);
+ ret = st->read_adc_temp_and_pressure(st, temp, pressure);
if (ret < 0) {
dev_err(&indio_dev->dev,
"failed to read temperature and pressure\n");
@@ -196,7 +195,7 @@ static int ms5611_reset(struct iio_dev *indio_dev)
int ret;
struct ms5611_state *st = iio_priv(indio_dev);
- ret = st->reset(&indio_dev->dev);
+ ret = st->reset(st);
if (ret < 0) {
dev_err(&indio_dev->dev, "failed to reset device\n");
return ret;
diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c
index 5c82d80f85b6..1047a85527a9 100644
--- a/drivers/iio/pressure/ms5611_i2c.c
+++ b/drivers/iio/pressure/ms5611_i2c.c
@@ -20,17 +20,15 @@
#include "ms5611.h"
-static int ms5611_i2c_reset(struct device *dev)
+static int ms5611_i2c_reset(struct ms5611_state *st)
{
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
-
return i2c_smbus_write_byte(st->client, MS5611_RESET);
}
-static int ms5611_i2c_read_prom_word(struct device *dev, int index, u16 *word)
+static int ms5611_i2c_read_prom_word(struct ms5611_state *st, int index,
+ u16 *word)
{
int ret;
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
ret = i2c_smbus_read_word_swapped(st->client,
MS5611_READ_PROM_WORD + (index << 1));
@@ -57,11 +55,10 @@ static int ms5611_i2c_read_adc(struct ms5611_state *st, s32 *val)
return 0;
}
-static int ms5611_i2c_read_adc_temp_and_pressure(struct device *dev,
+static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st,
s32 *temp, s32 *pressure)
{
int ret;
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
const struct ms5611_osr *osr = st->temp_osr;
ret = i2c_smbus_write_byte(st->client, osr->cmd);
diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c
index 79bed64c9b68..9fa2dcd71760 100644
--- a/drivers/iio/pressure/ms5611_spi.c
+++ b/drivers/iio/pressure/ms5611_spi.c
@@ -15,18 +15,17 @@
#include "ms5611.h"
-static int ms5611_spi_reset(struct device *dev)
+static int ms5611_spi_reset(struct ms5611_state *st)
{
u8 cmd = MS5611_RESET;
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
return spi_write_then_read(st->client, &cmd, 1, NULL, 0);
}
-static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word)
+static int ms5611_spi_read_prom_word(struct ms5611_state *st, int index,
+ u16 *word)
{
int ret;
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
ret = spi_w8r16be(st->client, MS5611_READ_PROM_WORD + (index << 1));
if (ret < 0)
@@ -37,11 +36,10 @@ static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word)
return 0;
}
-static int ms5611_spi_read_adc(struct device *dev, s32 *val)
+static int ms5611_spi_read_adc(struct ms5611_state *st, s32 *val)
{
int ret;
u8 buf[3] = { MS5611_READ_ADC };
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
ret = spi_write_then_read(st->client, buf, 1, buf, 3);
if (ret < 0)
@@ -52,11 +50,10 @@ static int ms5611_spi_read_adc(struct device *dev, s32 *val)
return 0;
}
-static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
+static int ms5611_spi_read_adc_temp_and_pressure(struct ms5611_state *st,
s32 *temp, s32 *pressure)
{
int ret;
- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
const struct ms5611_osr *osr = st->temp_osr;
/*
@@ -68,7 +65,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
return ret;
usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL));
- ret = ms5611_spi_read_adc(dev, temp);
+ ret = ms5611_spi_read_adc(st, temp);
if (ret < 0)
return ret;
@@ -78,7 +75,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
return ret;
usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL));
- return ms5611_spi_read_adc(dev, pressure);
+ return ms5611_spi_read_adc(st, pressure);
}
static int ms5611_spi_probe(struct spi_device *spi)
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index 3797a8f54276..51f4f92ae84a 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -133,7 +133,7 @@ static ssize_t as3935_sensor_sensitivity_store(struct device *dev,
unsigned long val;
int ret;
- ret = kstrtoul((const char *) buf, 10, &val);
+ ret = kstrtoul(buf, 10, &val);
if (ret)
return -EINVAL;
@@ -238,9 +238,6 @@ err_read:
return IRQ_HANDLED;
}
-static const struct iio_trigger_ops iio_interrupt_trigger_ops = {
-};
-
static void as3935_event_work(struct work_struct *work)
{
struct as3935_state *st;
@@ -417,7 +414,6 @@ static int as3935_probe(struct spi_device *spi)
st->trig = trig;
st->noise_tripped = jiffies - HZ;
iio_trigger_set_drvdata(trig, indio_dev);
- trig->ops = &iio_interrupt_trigger_ops;
ret = devm_iio_trigger_register(dev, trig);
if (ret) {
diff --git a/drivers/iio/test/iio-test-format.c b/drivers/iio/test/iio-test-format.c
index f1e951eddb43..237321436b83 100644
--- a/drivers/iio/test/iio-test-format.c
+++ b/drivers/iio/test/iio-test-format.c
@@ -14,10 +14,13 @@
static void iio_test_iio_format_value_integer(struct kunit *test)
{
- char *buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ char *buf;
int val;
int ret;
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
val = 42;
ret = iio_format_value(buf, IIO_VAL_INT, 1, &val);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "42\n");
@@ -41,153 +44,219 @@ static void iio_test_iio_format_value_integer(struct kunit *test)
static void iio_test_iio_format_value_fixedpoint(struct kunit *test)
{
- char *buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
int values[2];
+ char *buf;
int ret;
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
/* positive >= 1 */
values[0] = 1;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "1.000010\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "1.000010 dB\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "1.000000010\n");
/* positive < 1 */
values[0] = 0;
values[1] = 12;
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.000012\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.000012 dB\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.000000012\n");
/* negative <= -1 */
values[0] = -1;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-1.000010\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-1.000010 dB\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-1.000000010\n");
/* negative > -1 */
values[0] = 0;
values[1] = -123;
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-0.000123\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO_DB, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-0.000123 dB\n");
- ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_INT_PLUS_NANO, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-0.000000123\n");
}
static void iio_test_iio_format_value_fractional(struct kunit *test)
{
- char *buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
int values[2];
+ char *buf;
int ret;
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
/* positive < 1 */
values[0] = 1;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.100000000\n");
/* positive >= 1 */
values[0] = 100;
values[1] = 3;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "33.333333333\n");
/* negative > -1 */
values[0] = -1;
values[1] = 1000000000;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-0.000000001\n");
/* negative <= -1 */
values[0] = -200;
values[1] = 3;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-66.666666666\n");
/* Zero */
values[0] = 0;
values[1] = -10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.000000000\n");
}
static void iio_test_iio_format_value_fractional_log2(struct kunit *test)
{
- char *buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
int values[2];
+ char *buf;
int ret;
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
/* positive < 1 */
values[0] = 123;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.120117187\n");
/* positive >= 1 */
values[0] = 1234567;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "1205.631835937\n");
/* negative > -1 */
values[0] = -123;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-0.120117187\n");
/* negative <= -1 */
values[0] = -1234567;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-1205.631835937\n");
/* Zero */
values[0] = 0;
values[1] = 10;
- ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, 2, values);
+ ret = iio_format_value(buf, IIO_VAL_FRACTIONAL_LOG2, ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0.000000000\n");
}
static void iio_test_iio_format_value_multiple(struct kunit *test)
{
- char *buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
int values[] = {1, -2, 3, -4, 5};
+ char *buf;
int ret;
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
ret = iio_format_value(buf, IIO_VAL_INT_MULTIPLE,
ARRAY_SIZE(values), values);
IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "1 -2 3 -4 5 \n");
}
+static void iio_test_iio_format_value_integer_64(struct kunit *test)
+{
+ int values[2];
+ s64 value;
+ char *buf;
+ int ret;
+
+ buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
+ value = 24;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "24\n");
+
+ value = -24;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-24\n");
+
+ value = 0;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "0\n");
+
+ value = UINT_MAX;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "4294967295\n");
+
+ value = -((s64)UINT_MAX);
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-4294967295\n");
+
+ value = LLONG_MAX;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "9223372036854775807\n");
+
+ value = LLONG_MIN;
+ values[0] = lower_32_bits(value);
+ values[1] = upper_32_bits(value);
+ ret = iio_format_value(buf, IIO_VAL_INT_64, ARRAY_SIZE(values), values);
+ IIO_TEST_FORMAT_EXPECT_EQ(test, buf, ret, "-9223372036854775808\n");
+}
+
static struct kunit_case iio_format_test_cases[] = {
KUNIT_CASE(iio_test_iio_format_value_integer),
KUNIT_CASE(iio_test_iio_format_value_fixedpoint),
KUNIT_CASE(iio_test_iio_format_value_fractional),
KUNIT_CASE(iio_test_iio_format_value_fractional_log2),
KUNIT_CASE(iio_test_iio_format_value_multiple),
+ KUNIT_CASE(iio_test_iio_format_value_integer_64),
{}
};
diff --git a/drivers/iio/trigger/iio-trig-interrupt.c b/drivers/iio/trigger/iio-trig-interrupt.c
index f746c460bf2a..5f49cd105fae 100644
--- a/drivers/iio/trigger/iio-trig-interrupt.c
+++ b/drivers/iio/trigger/iio-trig-interrupt.c
@@ -25,9 +25,6 @@ static irqreturn_t iio_interrupt_trigger_poll(int irq, void *private)
return IRQ_HANDLED;
}
-static const struct iio_trigger_ops iio_interrupt_trigger_ops = {
-};
-
static int iio_interrupt_trigger_probe(struct platform_device *pdev)
{
struct iio_interrupt_trigger_info *trig_info;
@@ -58,7 +55,6 @@ static int iio_interrupt_trigger_probe(struct platform_device *pdev)
}
iio_trigger_set_drvdata(trig, trig_info);
trig_info->irq = irq;
- trig->ops = &iio_interrupt_trigger_ops;
ret = request_irq(irq, iio_interrupt_trigger_poll,
irqflags, trig->name, trig);
if (ret) {
diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c
index e9adfff45b39..2a4b75897910 100644
--- a/drivers/iio/trigger/iio-trig-sysfs.c
+++ b/drivers/iio/trigger/iio-trig-sysfs.c
@@ -124,9 +124,6 @@ static const struct attribute_group *iio_sysfs_trigger_attr_groups[] = {
NULL
};
-static const struct iio_trigger_ops iio_sysfs_trigger_ops = {
-};
-
static int iio_sysfs_trigger_probe(int id)
{
struct iio_sysfs_trig *t;
@@ -156,7 +153,6 @@ static int iio_sysfs_trigger_probe(int id)
}
t->trig->dev.groups = iio_sysfs_trigger_attr_groups;
- t->trig->ops = &iio_sysfs_trigger_ops;
iio_trigger_set_drvdata(t->trig, t);
t->work = IRQ_WORK_INIT_HARD(iio_sysfs_trigger_work);
diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 4353b749ecef..4f9461e1412c 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -696,9 +696,9 @@ static const struct iio_chan_spec_ext_info stm32_trigger_count_info[] = {
.write = stm32_count_set_preset
},
IIO_ENUM("enable_mode", IIO_SEPARATE, &stm32_enable_mode_enum),
- IIO_ENUM_AVAILABLE("enable_mode", &stm32_enable_mode_enum),
+ IIO_ENUM_AVAILABLE("enable_mode", IIO_SHARED_BY_TYPE, &stm32_enable_mode_enum),
IIO_ENUM("trigger_mode", IIO_SEPARATE, &stm32_trigger_mode_enum),
- IIO_ENUM_AVAILABLE("trigger_mode", &stm32_trigger_mode_enum),
+ IIO_ENUM_AVAILABLE("trigger_mode", IIO_SHARED_BY_TYPE, &stm32_trigger_mode_enum),
{}
};
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 4b1b16e7a75b..89234d04cc65 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -1709,14 +1709,14 @@ clean_msixtbl:
*/
static void irdma_get_used_rsrc(struct irdma_device *iwdev)
{
- iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds,
- iwdev->rf->max_pd, 0);
- iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps,
- iwdev->rf->max_qp, 0);
- iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs,
- iwdev->rf->max_cq, 0);
- iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs,
- iwdev->rf->max_mr, 0);
+ iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds,
+ iwdev->rf->max_pd);
+ iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
+ iwdev->rf->max_qp);
+ iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
+ iwdev->rf->max_cq);
+ iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr);
}
void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 9363bccfc6e7..a8e1c30c370f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -196,7 +196,7 @@ struct qib_ctxtdata {
pid_t pid;
pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
/* same size as task_struct .comm[], command that opened context */
- char comm[16];
+ char comm[TASK_COMM_LEN];
/* pkeys set by this use of this ctxt */
u16 pkeys[4];
/* so file ops can get at unit */
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 63854f4b6524..aa290928cf96 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1321,7 +1321,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
rcd->tid_pg_list = ptmp;
rcd->pid = current->pid;
init_waitqueue_head(&dd->rcd[ctxt]->wait);
- strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
+ get_task_comm(rcd->comm, current);
ctxt_fp(fp) = rcd;
qib_stats.sps_ctxts++;
dd->freectxts--;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index afe11f475b8c..5018b9387694 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -217,8 +217,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
* the port number must be in the Dynamic Ports range
* (0xc000 - 0xffff).
*/
- qp->src_port = RXE_ROCE_V2_SPORT +
- (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
qp->sq.max_wr = init->cap.max_send_wr;
/* These caps are limited by rxe_qp_chk_cap() done by the caller */
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 9093e6a80b26..e5c586913d0b 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -98,15 +98,14 @@ static int siw_create_tx_threads(void)
continue;
siw_tx_thread[cpu] =
- kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
- "siw_tx/%d", cpu);
+ kthread_run_on_cpu(siw_run_sq,
+ (unsigned long *)(long)cpu,
+ cpu, "siw_tx/%u");
if (IS_ERR(siw_tx_thread[cpu])) {
siw_tx_thread[cpu] = NULL;
continue;
}
- kthread_bind(siw_tx_thread[cpu], cpu);
- wake_up_process(siw_tx_thread[cpu]);
assigned++;
}
return assigned;
diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c
index 1cf5deda06e1..fa8d1a466014 100644
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -67,7 +67,7 @@ static int compat_effect(struct ff_device *ff, struct ff_effect *effect)
effect->type = FF_PERIODIC;
effect->u.periodic.waveform = FF_SINE;
effect->u.periodic.period = 50;
- effect->u.periodic.magnitude = max(magnitude, 0x7fff);
+ effect->u.periodic.magnitude = magnitude;
effect->u.periodic.offset = 0;
effect->u.periodic.phase = 0;
effect->u.periodic.envelope.attack_length = 0;
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 8dbf1e69c90a..d75a8b179a8a 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -247,7 +247,7 @@ static ssize_t gpio_keys_attr_store_helper(struct gpio_keys_drvdata *ddata,
ssize_t error;
int i;
- bits = bitmap_zalloc(n_events, GFP_KERNEL);
+ bits = bitmap_alloc(n_events, GFP_KERNEL);
if (!bits)
return -ENOMEM;
diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c
index e09b1fae42e1..04da7916eb70 100644
--- a/drivers/input/misc/axp20x-pek.c
+++ b/drivers/input/misc/axp20x-pek.c
@@ -206,11 +206,8 @@ ATTRIBUTE_GROUPS(axp20x);
static irqreturn_t axp20x_pek_irq(int irq, void *pwr)
{
- struct axp20x_pek *axp20x_pek = pwr;
- struct input_dev *idev = axp20x_pek->input;
-
- if (!idev)
- return IRQ_HANDLED;
+ struct input_dev *idev = pwr;
+ struct axp20x_pek *axp20x_pek = input_get_drvdata(idev);
/*
* The power-button is connected to ground so a falling edge (dbf)
@@ -229,9 +226,22 @@ static irqreturn_t axp20x_pek_irq(int irq, void *pwr)
static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek,
struct platform_device *pdev)
{
+ struct axp20x_dev *axp20x = axp20x_pek->axp20x;
struct input_dev *idev;
int error;
+ axp20x_pek->irq_dbr = platform_get_irq_byname(pdev, "PEK_DBR");
+ if (axp20x_pek->irq_dbr < 0)
+ return axp20x_pek->irq_dbr;
+ axp20x_pek->irq_dbr = regmap_irq_get_virq(axp20x->regmap_irqc,
+ axp20x_pek->irq_dbr);
+
+ axp20x_pek->irq_dbf = platform_get_irq_byname(pdev, "PEK_DBF");
+ if (axp20x_pek->irq_dbf < 0)
+ return axp20x_pek->irq_dbf;
+ axp20x_pek->irq_dbf = regmap_irq_get_virq(axp20x->regmap_irqc,
+ axp20x_pek->irq_dbf);
+
axp20x_pek->input = devm_input_allocate_device(&pdev->dev);
if (!axp20x_pek->input)
return -ENOMEM;
@@ -246,6 +256,24 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek,
input_set_drvdata(idev, axp20x_pek);
+ error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbr,
+ axp20x_pek_irq, 0,
+ "axp20x-pek-dbr", idev);
+ if (error < 0) {
+ dev_err(&pdev->dev, "Failed to request dbr IRQ#%d: %d\n",
+ axp20x_pek->irq_dbr, error);
+ return error;
+ }
+
+ error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbf,
+ axp20x_pek_irq, 0,
+ "axp20x-pek-dbf", idev);
+ if (error < 0) {
+ dev_err(&pdev->dev, "Failed to request dbf IRQ#%d: %d\n",
+ axp20x_pek->irq_dbf, error);
+ return error;
+ }
+
error = input_register_device(idev);
if (error) {
dev_err(&pdev->dev, "Can't register input device: %d\n",
@@ -253,6 +281,8 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek,
return error;
}
+ device_init_wakeup(&pdev->dev, true);
+
return 0;
}
@@ -293,18 +323,6 @@ static int axp20x_pek_probe(struct platform_device *pdev)
axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent);
- axp20x_pek->irq_dbr = platform_get_irq_byname(pdev, "PEK_DBR");
- if (axp20x_pek->irq_dbr < 0)
- return axp20x_pek->irq_dbr;
- axp20x_pek->irq_dbr = regmap_irq_get_virq(
- axp20x_pek->axp20x->regmap_irqc, axp20x_pek->irq_dbr);
-
- axp20x_pek->irq_dbf = platform_get_irq_byname(pdev, "PEK_DBF");
- if (axp20x_pek->irq_dbf < 0)
- return axp20x_pek->irq_dbf;
- axp20x_pek->irq_dbf = regmap_irq_get_virq(
- axp20x_pek->axp20x->regmap_irqc, axp20x_pek->irq_dbf);
-
if (axp20x_pek_should_register_input(axp20x_pek)) {
error = axp20x_pek_probe_input_device(axp20x_pek, pdev);
if (error)
@@ -313,26 +331,6 @@ static int axp20x_pek_probe(struct platform_device *pdev)
axp20x_pek->info = (struct axp20x_info *)match->driver_data;
- error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbr,
- axp20x_pek_irq, 0,
- "axp20x-pek-dbr", axp20x_pek);
- if (error < 0) {
- dev_err(&pdev->dev, "Failed to request dbr IRQ#%d: %d\n",
- axp20x_pek->irq_dbr, error);
- return error;
- }
-
- error = devm_request_any_context_irq(&pdev->dev, axp20x_pek->irq_dbf,
- axp20x_pek_irq, 0,
- "axp20x-pek-dbf", axp20x_pek);
- if (error < 0) {
- dev_err(&pdev->dev, "Failed to request dbf IRQ#%d: %d\n",
- axp20x_pek->irq_dbf, error);
- return error;
- }
-
- device_init_wakeup(&pdev->dev, true);
-
platform_set_drvdata(pdev, axp20x_pek);
return 0;
diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c
index f9b05cf09ff5..2213e06b611d 100644
--- a/drivers/input/misc/palmas-pwrbutton.c
+++ b/drivers/input/misc/palmas-pwrbutton.c
@@ -15,6 +15,7 @@
* GNU General Public License for more details.
*/
+#include <linux/bitfield.h>
#include <linux/init.h>
#include <linux/input.h>
#include <linux/interrupt.h>
@@ -115,8 +116,8 @@ static void palmas_pwron_params_ofinit(struct device *dev,
struct device_node *np;
u32 val;
int i, error;
- u8 lpk_times[] = { 6, 8, 10, 12 };
- int pwr_on_deb_ms[] = { 15, 100, 500, 1000 };
+ static const u8 lpk_times[] = { 6, 8, 10, 12 };
+ static const int pwr_on_deb_ms[] = { 15, 100, 500, 1000 };
memset(config, 0, sizeof(*config));
@@ -192,8 +193,8 @@ static int palmas_pwron_probe(struct platform_device *pdev)
* Setup default hardware shutdown option (long key press)
* and debounce.
*/
- val = config.long_press_time_val << __ffs(PALMAS_LPK_TIME_MASK);
- val |= config.pwron_debounce_val << __ffs(PALMAS_PWRON_DEBOUNCE_MASK);
+ val = FIELD_PREP(PALMAS_LPK_TIME_MASK, config.long_press_time_val) |
+ FIELD_PREP(PALMAS_PWRON_DEBOUNCE_MASK, config.pwron_debounce_val);
error = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE,
PALMAS_LONG_PRESS_KEY,
PALMAS_LPK_TIME_MASK |
diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c
index 6e0c5f5a2713..221a553f45cd 100644
--- a/drivers/input/mouse/byd.c
+++ b/drivers/input/mouse/byd.c
@@ -191,7 +191,7 @@
/*
* The touchpad generates a mixture of absolute and relative packets, indicated
- * by the the last byte of each packet being set to one of the following:
+ * by the last byte of each packet being set to one of the following:
*/
#define BYD_PACKET_ABSOLUTE 0xf8
#define BYD_PACKET_RELATIVE 0x00
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index aaa3c455e01e..a3bfc7a41679 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -297,6 +297,108 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
return -ENOMSG;
}
+static struct input_dev *goodix_create_pen_input(struct goodix_ts_data *ts)
+{
+ struct device *dev = &ts->client->dev;
+ struct input_dev *input;
+
+ input = devm_input_allocate_device(dev);
+ if (!input)
+ return NULL;
+
+ input_alloc_absinfo(input);
+ if (!input->absinfo) {
+ input_free_device(input);
+ return NULL;
+ }
+
+ input->absinfo[ABS_X] = ts->input_dev->absinfo[ABS_MT_POSITION_X];
+ input->absinfo[ABS_Y] = ts->input_dev->absinfo[ABS_MT_POSITION_Y];
+ __set_bit(ABS_X, input->absbit);
+ __set_bit(ABS_Y, input->absbit);
+ input_set_abs_params(input, ABS_PRESSURE, 0, 255, 0, 0);
+
+ input_set_capability(input, EV_KEY, BTN_TOUCH);
+ input_set_capability(input, EV_KEY, BTN_TOOL_PEN);
+ input_set_capability(input, EV_KEY, BTN_STYLUS);
+ input_set_capability(input, EV_KEY, BTN_STYLUS2);
+ __set_bit(INPUT_PROP_DIRECT, input->propbit);
+ /*
+ * The resolution of these touchscreens is about 10 units/mm, the actual
+ * resolution does not matter much since we set INPUT_PROP_DIRECT.
+ * Userspace wants something here though, so just set it to 10 units/mm.
+ */
+ input_abs_set_res(input, ABS_X, 10);
+ input_abs_set_res(input, ABS_Y, 10);
+
+ input->name = "Goodix Active Pen";
+ input->phys = "input/pen";
+ input->id.bustype = BUS_I2C;
+ input->id.vendor = 0x0416;
+ if (kstrtou16(ts->id, 10, &input->id.product))
+ input->id.product = 0x1001;
+ input->id.version = ts->version;
+
+ if (input_register_device(input) != 0) {
+ input_free_device(input);
+ return NULL;
+ }
+
+ return input;
+}
+
+static void goodix_ts_report_pen_down(struct goodix_ts_data *ts, u8 *data)
+{
+ int input_x, input_y, input_w;
+ u8 key_value;
+
+ if (!ts->input_pen) {
+ ts->input_pen = goodix_create_pen_input(ts);
+ if (!ts->input_pen)
+ return;
+ }
+
+ if (ts->contact_size == 9) {
+ input_x = get_unaligned_le16(&data[4]);
+ input_y = get_unaligned_le16(&data[6]);
+ input_w = get_unaligned_le16(&data[8]);
+ } else {
+ input_x = get_unaligned_le16(&data[2]);
+ input_y = get_unaligned_le16(&data[4]);
+ input_w = get_unaligned_le16(&data[6]);
+ }
+
+ touchscreen_report_pos(ts->input_pen, &ts->prop, input_x, input_y, false);
+ input_report_abs(ts->input_pen, ABS_PRESSURE, input_w);
+
+ input_report_key(ts->input_pen, BTN_TOUCH, 1);
+ input_report_key(ts->input_pen, BTN_TOOL_PEN, 1);
+
+ if (data[0] & GOODIX_HAVE_KEY) {
+ key_value = data[1 + ts->contact_size];
+ input_report_key(ts->input_pen, BTN_STYLUS, key_value & 0x10);
+ input_report_key(ts->input_pen, BTN_STYLUS2, key_value & 0x20);
+ } else {
+ input_report_key(ts->input_pen, BTN_STYLUS, 0);
+ input_report_key(ts->input_pen, BTN_STYLUS2, 0);
+ }
+
+ input_sync(ts->input_pen);
+}
+
+static void goodix_ts_report_pen_up(struct goodix_ts_data *ts)
+{
+ if (!ts->input_pen)
+ return;
+
+ input_report_key(ts->input_pen, BTN_TOUCH, 0);
+ input_report_key(ts->input_pen, BTN_TOOL_PEN, 0);
+ input_report_key(ts->input_pen, BTN_STYLUS, 0);
+ input_report_key(ts->input_pen, BTN_STYLUS2, 0);
+
+ input_sync(ts->input_pen);
+}
+
static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data)
{
int id = coor_data[0] & 0x0F;
@@ -327,6 +429,14 @@ static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data)
input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
}
+static void goodix_ts_release_keys(struct goodix_ts_data *ts)
+{
+ int i;
+
+ for (i = 0; i < GOODIX_MAX_KEYS; i++)
+ input_report_key(ts->input_dev, ts->keymap[i], 0);
+}
+
static void goodix_ts_report_key(struct goodix_ts_data *ts, u8 *data)
{
int touch_num;
@@ -341,8 +451,7 @@ static void goodix_ts_report_key(struct goodix_ts_data *ts, u8 *data)
input_report_key(ts->input_dev,
ts->keymap[i], 1);
} else {
- for (i = 0; i < GOODIX_MAX_KEYS; i++)
- input_report_key(ts->input_dev, ts->keymap[i], 0);
+ goodix_ts_release_keys(ts);
}
}
@@ -364,6 +473,15 @@ static void goodix_process_events(struct goodix_ts_data *ts)
if (touch_num < 0)
return;
+ /* The pen being down is always reported as a single touch */
+ if (touch_num == 1 && (point_data[1] & 0x80)) {
+ goodix_ts_report_pen_down(ts, point_data);
+ goodix_ts_release_keys(ts);
+ goto sync; /* Release any previously registered touches */
+ } else {
+ goodix_ts_report_pen_up(ts);
+ }
+
goodix_ts_report_key(ts, point_data);
for (i = 0; i < touch_num; i++)
@@ -374,6 +492,7 @@ static void goodix_process_events(struct goodix_ts_data *ts)
goodix_ts_report_touch_8b(ts,
&point_data[1 + ts->contact_size * i]);
+sync:
input_mt_sync_frame(ts->input_dev);
input_sync(ts->input_dev);
}
@@ -857,7 +976,7 @@ retry_get_irq_gpio:
if (IS_ERR(gpiod)) {
error = PTR_ERR(gpiod);
if (error != -EPROBE_DEFER)
- dev_dbg(dev, "Failed to get %s GPIO: %d\n",
+ dev_err(dev, "Failed to get %s GPIO: %d\n",
GOODIX_GPIO_INT_NAME, error);
return error;
}
@@ -874,7 +993,7 @@ retry_get_irq_gpio:
if (IS_ERR(gpiod)) {
error = PTR_ERR(gpiod);
if (error != -EPROBE_DEFER)
- dev_dbg(dev, "Failed to get %s GPIO: %d\n",
+ dev_err(dev, "Failed to get %s GPIO: %d\n",
GOODIX_GPIO_RST_NAME, error);
return error;
}
diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h
index 02065d1c3263..fa8602e78a64 100644
--- a/drivers/input/touchscreen/goodix.h
+++ b/drivers/input/touchscreen/goodix.h
@@ -76,6 +76,7 @@ struct goodix_chip_data {
struct goodix_ts_data {
struct i2c_client *client;
struct input_dev *input_dev;
+ struct input_dev *input_pen;
const struct goodix_chip_data *chip;
const char *firmware_name;
struct touchscreen_properties prop;
diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index 1ee760bac0cf..3eef8c01090f 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c
@@ -67,6 +67,7 @@ struct silead_ts_data {
struct i2c_client *client;
struct gpio_desc *gpio_power;
struct input_dev *input;
+ struct input_dev *pen_input;
struct regulator_bulk_data regulators[2];
char fw_name[64];
struct touchscreen_properties prop;
@@ -75,6 +76,13 @@ struct silead_ts_data {
struct input_mt_pos pos[SILEAD_MAX_FINGERS];
int slots[SILEAD_MAX_FINGERS];
int id[SILEAD_MAX_FINGERS];
+ u32 efi_fw_min_max[4];
+ bool efi_fw_min_max_set;
+ bool pen_supported;
+ bool pen_down;
+ u32 pen_x_res;
+ u32 pen_y_res;
+ int pen_up_count;
};
struct silead_fw_data {
@@ -82,6 +90,35 @@ struct silead_fw_data {
u32 val;
};
+static void silead_apply_efi_fw_min_max(struct silead_ts_data *data)
+{
+ struct input_absinfo *absinfo_x = &data->input->absinfo[ABS_MT_POSITION_X];
+ struct input_absinfo *absinfo_y = &data->input->absinfo[ABS_MT_POSITION_Y];
+
+ if (!data->efi_fw_min_max_set)
+ return;
+
+ absinfo_x->minimum = data->efi_fw_min_max[0];
+ absinfo_x->maximum = data->efi_fw_min_max[1];
+ absinfo_y->minimum = data->efi_fw_min_max[2];
+ absinfo_y->maximum = data->efi_fw_min_max[3];
+
+ if (data->prop.invert_x) {
+ absinfo_x->maximum -= absinfo_x->minimum;
+ absinfo_x->minimum = 0;
+ }
+
+ if (data->prop.invert_y) {
+ absinfo_y->maximum -= absinfo_y->minimum;
+ absinfo_y->minimum = 0;
+ }
+
+ if (data->prop.swap_x_y) {
+ swap(absinfo_x->minimum, absinfo_y->minimum);
+ swap(absinfo_x->maximum, absinfo_y->maximum);
+ }
+}
+
static int silead_ts_request_input_dev(struct silead_ts_data *data)
{
struct device *dev = &data->client->dev;
@@ -97,6 +134,7 @@ static int silead_ts_request_input_dev(struct silead_ts_data *data)
input_set_abs_params(data->input, ABS_MT_POSITION_X, 0, 4095, 0, 0);
input_set_abs_params(data->input, ABS_MT_POSITION_Y, 0, 4095, 0, 0);
touchscreen_parse_properties(data->input, true, &data->prop);
+ silead_apply_efi_fw_min_max(data);
input_mt_init_slots(data->input, data->max_fingers,
INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED |
@@ -112,6 +150,40 @@ static int silead_ts_request_input_dev(struct silead_ts_data *data)
error = input_register_device(data->input);
if (error) {
dev_err(dev, "Failed to register input device: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int silead_ts_request_pen_input_dev(struct silead_ts_data *data)
+{
+ struct device *dev = &data->client->dev;
+ int error;
+
+ if (!data->pen_supported)
+ return 0;
+
+ data->pen_input = devm_input_allocate_device(dev);
+ if (!data->pen_input)
+ return -ENOMEM;
+
+ input_set_abs_params(data->pen_input, ABS_X, 0, 4095, 0, 0);
+ input_set_abs_params(data->pen_input, ABS_Y, 0, 4095, 0, 0);
+ input_set_capability(data->pen_input, EV_KEY, BTN_TOUCH);
+ input_set_capability(data->pen_input, EV_KEY, BTN_TOOL_PEN);
+ set_bit(INPUT_PROP_DIRECT, data->pen_input->propbit);
+ touchscreen_parse_properties(data->pen_input, false, &data->prop);
+ input_abs_set_res(data->pen_input, ABS_X, data->pen_x_res);
+ input_abs_set_res(data->pen_input, ABS_Y, data->pen_y_res);
+
+ data->pen_input->name = SILEAD_TS_NAME " pen";
+ data->pen_input->phys = "input/pen";
+ data->input->id.bustype = BUS_I2C;
+
+ error = input_register_device(data->pen_input);
+ if (error) {
+ dev_err(dev, "Failed to register pen input device: %d\n", error);
return error;
}
@@ -129,6 +201,45 @@ static void silead_ts_set_power(struct i2c_client *client,
}
}
+static bool silead_ts_handle_pen_data(struct silead_ts_data *data, u8 *buf)
+{
+ u8 *coord = buf + SILEAD_POINT_DATA_LEN;
+ struct input_mt_pos pos;
+
+ if (!data->pen_supported || buf[2] != 0x00 || buf[3] != 0x00)
+ return false;
+
+ if (buf[0] == 0x00 && buf[1] == 0x00 && data->pen_down) {
+ data->pen_up_count++;
+ if (data->pen_up_count == 6) {
+ data->pen_down = false;
+ goto sync;
+ }
+ return true;
+ }
+
+ if (buf[0] == 0x01 && buf[1] == 0x08) {
+ touchscreen_set_mt_pos(&pos, &data->prop,
+ get_unaligned_le16(&coord[SILEAD_POINT_X_OFF]) & 0xfff,
+ get_unaligned_le16(&coord[SILEAD_POINT_Y_OFF]) & 0xfff);
+
+ input_report_abs(data->pen_input, ABS_X, pos.x);
+ input_report_abs(data->pen_input, ABS_Y, pos.y);
+
+ data->pen_up_count = 0;
+ data->pen_down = true;
+ goto sync;
+ }
+
+ return false;
+
+sync:
+ input_report_key(data->pen_input, BTN_TOOL_PEN, data->pen_down);
+ input_report_key(data->pen_input, BTN_TOUCH, data->pen_down);
+ input_sync(data->pen_input);
+ return true;
+}
+
static void silead_ts_read_data(struct i2c_client *client)
{
struct silead_ts_data *data = i2c_get_clientdata(client);
@@ -151,6 +262,9 @@ static void silead_ts_read_data(struct i2c_client *client)
buf[0] = data->max_fingers;
}
+ if (silead_ts_handle_pen_data(data, buf))
+ goto sync; /* Pen is down, release all previous touches */
+
touch_nr = 0;
bufp = buf + SILEAD_POINT_DATA_LEN;
for (i = 0; i < buf[0]; i++, bufp += SILEAD_POINT_DATA_LEN) {
@@ -193,6 +307,7 @@ static void silead_ts_read_data(struct i2c_client *client)
data->pos[i].y, data->id[i], data->slots[i]);
}
+sync:
input_mt_sync_frame(input);
input_report_key(input, KEY_LEFTMETA, softbutton_pressed);
input_sync(input);
@@ -282,17 +397,56 @@ static int silead_ts_load_fw(struct i2c_client *client)
{
struct device *dev = &client->dev;
struct silead_ts_data *data = i2c_get_clientdata(client);
- unsigned int fw_size, i;
- const struct firmware *fw;
+ const struct firmware *fw = NULL;
struct silead_fw_data *fw_data;
+ unsigned int fw_size, i;
int error;
dev_dbg(dev, "Firmware file name: %s", data->fw_name);
- error = firmware_request_platform(&fw, data->fw_name, dev);
+ /*
+ * Unfortunately, at the time of writing this comment, we have been unable to
+ * get permission from Silead, or from device OEMs, to distribute the necessary
+ * Silead firmware files in linux-firmware.
+ *
+ * On a whole bunch of devices the UEFI BIOS code contains a touchscreen driver,
+ * which contains an embedded copy of the firmware. The fw-loader code has a
+ * "platform" fallback mechanism, which together with info on the firmware
+ * from drivers/platform/x86/touchscreen_dmi.c will use the firmware from the
+ * UEFI driver when the firmware is missing from /lib/firmware. This makes the
+ * touchscreen work OOTB without users needing to manually download the firmware.
+ *
+ * The firmware bundled with the original Windows/Android is usually newer then
+ * the firmware in the UEFI driver and it is better calibrated. This better
+ * calibration can lead to significant differences in the reported min/max
+ * coordinates.
+ *
+ * To deal with this we first try to load the firmware without "platform"
+ * fallback. If that fails we retry with "platform" fallback and if that
+ * succeeds we apply an (optional) set of alternative min/max values from the
+ * "silead,efi-fw-min-max" property.
+ */
+ error = firmware_request_nowarn(&fw, data->fw_name, dev);
if (error) {
- dev_err(dev, "Firmware request error %d\n", error);
- return error;
+ error = firmware_request_platform(&fw, data->fw_name, dev);
+ if (error) {
+ dev_err(dev, "Firmware request error %d\n", error);
+ return error;
+ }
+
+ error = device_property_read_u32_array(dev, "silead,efi-fw-min-max",
+ data->efi_fw_min_max,
+ ARRAY_SIZE(data->efi_fw_min_max));
+ if (!error)
+ data->efi_fw_min_max_set = true;
+
+ /* The EFI (platform) embedded fw does not have pen support */
+ if (data->pen_supported) {
+ dev_warn(dev, "Warning loading '%s' from filesystem failed, using EFI embedded copy.\n",
+ data->fw_name);
+ dev_warn(dev, "Warning pen support is known to be broken in the EFI embedded fw version\n");
+ data->pen_supported = false;
+ }
}
fw_size = fw->size / sizeof(*fw_data);
@@ -450,6 +604,10 @@ static void silead_ts_read_props(struct i2c_client *client)
"silead/%s", str);
else
dev_dbg(dev, "Firmware file name read error. Using default.");
+
+ data->pen_supported = device_property_read_bool(dev, "silead,pen-supported");
+ device_property_read_u32(dev, "silead,pen-resolution-x", &data->pen_x_res);
+ device_property_read_u32(dev, "silead,pen-resolution-y", &data->pen_y_res);
}
#ifdef CONFIG_ACPI
@@ -562,6 +720,10 @@ static int silead_ts_probe(struct i2c_client *client,
if (error)
return error;
+ error = silead_ts_request_pen_input_dev(data);
+ if (error)
+ return error;
+
error = devm_request_threaded_irq(dev, client->irq,
NULL, silead_ts_threaded_irq_handler,
IRQF_ONESHOT, client->name, data);
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index 83e685557a19..f2fb6a9a1a57 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -126,12 +126,13 @@ static int titsc_config_wires(struct titsc *ts_dev)
static void titsc_step_config(struct titsc *ts_dev)
{
unsigned int config;
- int i;
+ int i, n;
int end_step, first_step, tsc_steps;
u32 stepenable;
config = STEPCONFIG_MODE_HWSYNC |
- STEPCONFIG_AVG_16 | ts_dev->bit_xp;
+ STEPCONFIG_AVG_16 | ts_dev->bit_xp |
+ STEPCONFIG_INM_ADCREFM;
switch (ts_dev->wires) {
case 4:
config |= STEPCONFIG_INP(ts_dev->inp_yp) | ts_dev->bit_xn;
@@ -150,9 +151,11 @@ static void titsc_step_config(struct titsc *ts_dev)
first_step = TOTAL_STEPS - tsc_steps;
/* Steps 16 to 16-coordinate_readouts is for X */
end_step = first_step + tsc_steps;
+ n = 0;
for (i = end_step - ts_dev->coordinate_readouts; i < end_step; i++) {
titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
- titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+ titsc_writel(ts_dev, REG_STEPDELAY(i),
+ n++ == 0 ? STEPCONFIG_OPENDLY : 0);
}
config = 0;
@@ -174,9 +177,11 @@ static void titsc_step_config(struct titsc *ts_dev)
/* 1 ... coordinate_readouts is for Y */
end_step = first_step + ts_dev->coordinate_readouts;
+ n = 0;
for (i = first_step; i < end_step; i++) {
titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
- titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+ titsc_writel(ts_dev, REG_STEPDELAY(i),
+ n++ == 0 ? STEPCONFIG_OPENDLY : 0);
}
/* Make CHARGECONFIG same as IDLECONFIG */
@@ -195,7 +200,10 @@ static void titsc_step_config(struct titsc *ts_dev)
STEPCONFIG_OPENDLY);
end_step++;
- config |= STEPCONFIG_INP(ts_dev->inp_yn);
+ config = STEPCONFIG_MODE_HWSYNC |
+ STEPCONFIG_AVG_16 | ts_dev->bit_yp |
+ ts_dev->bit_xn | STEPCONFIG_INM_ADCREFM |
+ STEPCONFIG_INP(ts_dev->inp_yn);
titsc_writel(ts_dev, REG_STEPCONFIG(end_step), config);
titsc_writel(ts_dev, REG_STEPDELAY(end_step),
STEPCONFIG_OPENDLY);
@@ -310,7 +318,7 @@ static irqreturn_t titsc_irq(int irq, void *dev)
/*
* Calculate pressure using formula
* Resistance(touch) = x plate resistance *
- * x postion/4096 * ((z2 / z1) - 1)
+ * x position/4096 * ((z2 / z1) - 1)
*/
z = z1 - z2;
z *= x;
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index e3f2c940ef3d..dfd3b35590c3 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -186,7 +186,6 @@ static irqreturn_t ucb1400_irq(int irqnr, void *devid)
{
struct ucb1400_ts *ucb = devid;
unsigned int x, y, p;
- bool penup;
if (unlikely(irqnr != ucb->irq))
return IRQ_NONE;
@@ -196,8 +195,7 @@ static irqreturn_t ucb1400_irq(int irqnr, void *devid)
/* Start with a small delay before checking pendown state */
msleep(UCB1400_TS_POLL_PERIOD);
- while (!ucb->stopped && !(penup = ucb1400_ts_pen_up(ucb))) {
-
+ while (!ucb->stopped && !ucb1400_ts_pen_up(ucb)) {
ucb1400_adc_enable(ucb->ac97);
x = ucb1400_ts_read_xpos(ucb);
y = ucb1400_ts_read_ypos(ucb);
diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c
index fe4ea6204a4e..141754b2764c 100644
--- a/drivers/input/touchscreen/wacom_i2c.c
+++ b/drivers/input/touchscreen/wacom_i2c.c
@@ -24,12 +24,19 @@
#define WACOM_IN_PROXIMITY BIT(5)
/* Registers */
-#define WACOM_CMD_QUERY0 0x04
-#define WACOM_CMD_QUERY1 0x00
-#define WACOM_CMD_QUERY2 0x33
-#define WACOM_CMD_QUERY3 0x02
-#define WACOM_CMD_THROW0 0x05
-#define WACOM_CMD_THROW1 0x00
+#define WACOM_COMMAND_LSB 0x04
+#define WACOM_COMMAND_MSB 0x00
+
+#define WACOM_DATA_LSB 0x05
+#define WACOM_DATA_MSB 0x00
+
+/* Report types */
+#define REPORT_FEATURE 0x30
+
+/* Requests / operations */
+#define OPCODE_GET_REPORT 0x02
+
+#define WACOM_QUERY_REPORT 3
#define WACOM_QUERY_SIZE 19
struct wacom_features {
@@ -50,23 +57,24 @@ struct wacom_i2c {
static int wacom_query_device(struct i2c_client *client,
struct wacom_features *features)
{
- int ret;
- u8 cmd1[] = { WACOM_CMD_QUERY0, WACOM_CMD_QUERY1,
- WACOM_CMD_QUERY2, WACOM_CMD_QUERY3 };
- u8 cmd2[] = { WACOM_CMD_THROW0, WACOM_CMD_THROW1 };
+ u8 get_query_data_cmd[] = {
+ WACOM_COMMAND_LSB,
+ WACOM_COMMAND_MSB,
+ REPORT_FEATURE | WACOM_QUERY_REPORT,
+ OPCODE_GET_REPORT,
+ WACOM_DATA_LSB,
+ WACOM_DATA_MSB,
+ };
u8 data[WACOM_QUERY_SIZE];
+ int ret;
+
struct i2c_msg msgs[] = {
+ /* Request reading of feature ReportID: 3 (Pen Query Data) */
{
.addr = client->addr,
.flags = 0,
- .len = sizeof(cmd1),
- .buf = cmd1,
- },
- {
- .addr = client->addr,
- .flags = 0,
- .len = sizeof(cmd2),
- .buf = cmd2,
+ .len = sizeof(get_query_data_cmd),
+ .buf = get_query_data_cmd,
},
{
.addr = client->addr,
diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 1e70b8d2a8d7..7c82c4f5fa6b 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -252,16 +252,27 @@ static int zinitix_init_touch(struct bt541_ts_data *bt541)
static int zinitix_init_regulators(struct bt541_ts_data *bt541)
{
- struct i2c_client *client = bt541->client;
+ struct device *dev = &bt541->client->dev;
int error;
- bt541->supplies[0].supply = "vdd";
- bt541->supplies[1].supply = "vddo";
- error = devm_regulator_bulk_get(&client->dev,
+ /*
+ * Some older device trees have erroneous names for the regulators,
+ * so check if "vddo" is present and in that case use these names.
+ * Else use the proper supply names on the component.
+ */
+ if (of_find_property(dev->of_node, "vddo-supply", NULL)) {
+ bt541->supplies[0].supply = "vdd";
+ bt541->supplies[1].supply = "vddo";
+ } else {
+ /* Else use the proper supply names */
+ bt541->supplies[0].supply = "vcca";
+ bt541->supplies[1].supply = "vdd";
+ }
+ error = devm_regulator_bulk_get(dev,
ARRAY_SIZE(bt541->supplies),
bt541->supplies);
if (error < 0) {
- dev_err(&client->dev, "Failed to get regulators: %d\n", error);
+ dev_err(dev, "Failed to get regulators: %d\n", error);
return error;
}
@@ -560,6 +571,7 @@ static SIMPLE_DEV_PM_OPS(zinitix_pm_ops, zinitix_suspend, zinitix_resume);
#ifdef CONFIG_OF
static const struct of_device_id zinitix_of_match[] = {
+ { .compatible = "zinitix,bt532" },
{ .compatible = "zinitix,bt541" },
{ }
};
diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
index daf1e25f6042..91353e651a52 100644
--- a/drivers/interconnect/qcom/Kconfig
+++ b/drivers/interconnect/qcom/Kconfig
@@ -35,6 +35,15 @@ config INTERCONNECT_QCOM_MSM8974
This is a driver for the Qualcomm Network-on-Chip on msm8974-based
platforms.
+config INTERCONNECT_QCOM_MSM8996
+ tristate "Qualcomm MSM8996 interconnect driver"
+ depends on INTERCONNECT_QCOM
+ depends on QCOM_SMD_RPM
+ select INTERCONNECT_QCOM_SMD_RPM
+ help
+ This is a driver for the Qualcomm Network-on-Chip on msm8996-based
+ platforms.
+
config INTERCONNECT_QCOM_OSM_L3
tristate "Qualcomm OSM L3 interconnect driver"
depends on INTERCONNECT_QCOM || COMPILE_TEST
@@ -42,6 +51,15 @@ config INTERCONNECT_QCOM_OSM_L3
Say y here to support the Operating State Manager (OSM) interconnect
driver which controls the scaling of L3 caches on Qualcomm SoCs.
+config INTERCONNECT_QCOM_QCM2290
+ tristate "Qualcomm QCM2290 interconnect driver"
+ depends on INTERCONNECT_QCOM
+ depends on QCOM_SMD_RPM
+ select INTERCONNECT_QCOM_SMD_RPM
+ help
+ This is a driver for the Qualcomm Network-on-Chip on qcm2290-based
+ platforms.
+
config INTERCONNECT_QCOM_QCS404
tristate "Qualcomm QCS404 interconnect driver"
depends on INTERCONNECT_QCOM
@@ -146,5 +164,14 @@ config INTERCONNECT_QCOM_SM8350
This is a driver for the Qualcomm Network-on-Chip on SM8350-based
platforms.
+config INTERCONNECT_QCOM_SM8450
+ tristate "Qualcomm SM8450 interconnect driver"
+ depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
+ select INTERCONNECT_QCOM_RPMH
+ select INTERCONNECT_QCOM_BCM_VOTER
+ help
+ This is a driver for the Qualcomm Network-on-Chip on SM8450-based
+ platforms.
+
config INTERCONNECT_QCOM_SMD_RPM
tristate
diff --git a/drivers/interconnect/qcom/Makefile b/drivers/interconnect/qcom/Makefile
index 69300b1d48ef..ceae9bb566c6 100644
--- a/drivers/interconnect/qcom/Makefile
+++ b/drivers/interconnect/qcom/Makefile
@@ -4,7 +4,9 @@ icc-bcm-voter-objs := bcm-voter.o
qnoc-msm8916-objs := msm8916.o
qnoc-msm8939-objs := msm8939.o
qnoc-msm8974-objs := msm8974.o
+qnoc-msm8996-objs := msm8996.o
icc-osm-l3-objs := osm-l3.o
+qnoc-qcm2290-objs := qcm2290.o
qnoc-qcs404-objs := qcs404.o
icc-rpmh-obj := icc-rpmh.o
qnoc-sc7180-objs := sc7180.o
@@ -16,13 +18,16 @@ qnoc-sdx55-objs := sdx55.o
qnoc-sm8150-objs := sm8150.o
qnoc-sm8250-objs := sm8250.o
qnoc-sm8350-objs := sm8350.o
+qnoc-sm8450-objs := sm8450.o
icc-smd-rpm-objs := smd-rpm.o icc-rpm.o
obj-$(CONFIG_INTERCONNECT_QCOM_BCM_VOTER) += icc-bcm-voter.o
obj-$(CONFIG_INTERCONNECT_QCOM_MSM8916) += qnoc-msm8916.o
obj-$(CONFIG_INTERCONNECT_QCOM_MSM8939) += qnoc-msm8939.o
obj-$(CONFIG_INTERCONNECT_QCOM_MSM8974) += qnoc-msm8974.o
+obj-$(CONFIG_INTERCONNECT_QCOM_MSM8996) += qnoc-msm8996.o
obj-$(CONFIG_INTERCONNECT_QCOM_OSM_L3) += icc-osm-l3.o
+obj-$(CONFIG_INTERCONNECT_QCOM_QCM2290) += qnoc-qcm2290.o
obj-$(CONFIG_INTERCONNECT_QCOM_QCS404) += qnoc-qcs404.o
obj-$(CONFIG_INTERCONNECT_QCOM_RPMH) += icc-rpmh.o
obj-$(CONFIG_INTERCONNECT_QCOM_SC7180) += qnoc-sc7180.o
@@ -34,4 +39,5 @@ obj-$(CONFIG_INTERCONNECT_QCOM_SDX55) += qnoc-sdx55.o
obj-$(CONFIG_INTERCONNECT_QCOM_SM8150) += qnoc-sm8150.o
obj-$(CONFIG_INTERCONNECT_QCOM_SM8250) += qnoc-sm8250.o
obj-$(CONFIG_INTERCONNECT_QCOM_SM8350) += qnoc-sm8350.o
+obj-$(CONFIG_INTERCONNECT_QCOM_SM8450) += qnoc-sm8450.o
obj-$(CONFIG_INTERCONNECT_QCOM_SMD_RPM) += icc-smd-rpm.o
diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index ef7999a08c8b..34125e8f8b60 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -11,12 +11,20 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include "smd-rpm.h"
#include "icc-rpm.h"
+/* QNOC QoS */
+#define QNOC_QOS_MCTL_LOWn_ADDR(n) (0x8 + (n * 0x1000))
+#define QNOC_QOS_MCTL_DFLT_PRIO_MASK 0x70
+#define QNOC_QOS_MCTL_DFLT_PRIO_SHIFT 4
+#define QNOC_QOS_MCTL_URGFWD_EN_MASK 0x8
+#define QNOC_QOS_MCTL_URGFWD_EN_SHIFT 3
+
/* BIMC QoS */
#define M_BKE_REG_BASE(n) (0x300 + (0x4000 * n))
#define M_BKE_EN_ADDR(n) (M_BKE_REG_BASE(n))
@@ -39,6 +47,27 @@
#define NOC_QOS_MODEn_ADDR(n) (0xc + (n * 0x1000))
#define NOC_QOS_MODEn_MASK 0x3
+static int qcom_icc_set_qnoc_qos(struct icc_node *src, u64 max_bw)
+{
+ struct icc_provider *provider = src->provider;
+ struct qcom_icc_provider *qp = to_qcom_provider(provider);
+ struct qcom_icc_node *qn = src->data;
+ struct qcom_icc_qos *qos = &qn->qos;
+ int rc;
+
+ rc = regmap_update_bits(qp->regmap,
+ qp->qos_offset + QNOC_QOS_MCTL_LOWn_ADDR(qos->qos_port),
+ QNOC_QOS_MCTL_DFLT_PRIO_MASK,
+ qos->areq_prio << QNOC_QOS_MCTL_DFLT_PRIO_SHIFT);
+ if (rc)
+ return rc;
+
+ return regmap_update_bits(qp->regmap,
+ qp->qos_offset + QNOC_QOS_MCTL_LOWn_ADDR(qos->qos_port),
+ QNOC_QOS_MCTL_URGFWD_EN_MASK,
+ !!qos->urg_fwd_en << QNOC_QOS_MCTL_URGFWD_EN_SHIFT);
+}
+
static int qcom_icc_bimc_set_qos_health(struct qcom_icc_provider *qp,
struct qcom_icc_qos *qos,
int regnum)
@@ -76,7 +105,7 @@ static int qcom_icc_set_bimc_qos(struct icc_node *src, u64 max_bw)
provider = src->provider;
qp = to_qcom_provider(provider);
- if (qn->qos.qos_mode != -1)
+ if (qn->qos.qos_mode != NOC_QOS_MODE_INVALID)
mode = qn->qos.qos_mode;
/* QoS Priority: The QoS Health parameters are getting considered
@@ -137,7 +166,7 @@ static int qcom_icc_set_noc_qos(struct icc_node *src, u64 max_bw)
return 0;
}
- if (qn->qos.qos_mode != -1)
+ if (qn->qos.qos_mode != NOC_QOS_MODE_INVALID)
mode = qn->qos.qos_mode;
if (mode == NOC_QOS_MODE_FIXED) {
@@ -163,10 +192,14 @@ static int qcom_icc_qos_set(struct icc_node *node, u64 sum_bw)
dev_dbg(node->provider->dev, "Setting QoS for %s\n", qn->name);
- if (qp->is_bimc_node)
+ switch (qp->type) {
+ case QCOM_ICC_BIMC:
return qcom_icc_set_bimc_qos(node, sum_bw);
-
- return qcom_icc_set_noc_qos(node, sum_bw);
+ case QCOM_ICC_QNOC:
+ return qcom_icc_set_qnoc_qos(node, sum_bw);
+ default:
+ return qcom_icc_set_noc_qos(node, sum_bw);
+ }
}
static int qcom_icc_rpm_set(int mas_rpm_id, int slv_rpm_id, u64 sum_bw)
@@ -239,6 +272,7 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
rate = max(sum_bw, max_peak_bw);
do_div(rate, qn->buswidth);
+ rate = min_t(u64, rate, LONG_MAX);
if (qn->rate == rate)
return 0;
@@ -307,7 +341,7 @@ int qnoc_probe(struct platform_device *pdev)
qp->bus_clks[i].id = cds[i];
qp->num_clks = cd_num;
- qp->is_bimc_node = desc->is_bimc_node;
+ qp->type = desc->type;
qp->qos_offset = desc->qos_offset;
if (desc->regmap_cfg) {
@@ -315,8 +349,13 @@ int qnoc_probe(struct platform_device *pdev)
void __iomem *mmio;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
+ if (!res) {
+ /* Try parent's regmap */
+ qp->regmap = dev_get_regmap(dev->parent, NULL);
+ if (qp->regmap)
+ goto regmap_done;
return -ENODEV;
+ }
mmio = devm_ioremap_resource(dev, res);
@@ -332,6 +371,7 @@ int qnoc_probe(struct platform_device *pdev)
}
}
+regmap_done:
ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
if (ret)
return ret;
@@ -340,6 +380,12 @@ int qnoc_probe(struct platform_device *pdev)
if (ret)
return ret;
+ if (desc->has_bus_pd) {
+ ret = dev_pm_domain_attach(dev, true);
+ if (ret)
+ return ret;
+ }
+
provider = &qp->provider;
INIT_LIST_HEAD(&provider->nodes);
provider->dev = dev;
@@ -377,6 +423,10 @@ int qnoc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, qp);
+ /* Populate child NoC devices if any */
+ if (of_get_child_count(dev->of_node) > 0)
+ return of_platform_populate(dev->of_node, NULL, NULL, dev);
+
return 0;
err:
icc_nodes_remove(provider);
diff --git a/drivers/interconnect/qcom/icc-rpm.h b/drivers/interconnect/qcom/icc-rpm.h
index f5744de4da19..26dad006034f 100644
--- a/drivers/interconnect/qcom/icc-rpm.h
+++ b/drivers/interconnect/qcom/icc-rpm.h
@@ -12,19 +12,25 @@
#define to_qcom_provider(_provider) \
container_of(_provider, struct qcom_icc_provider, provider)
+enum qcom_icc_type {
+ QCOM_ICC_NOC,
+ QCOM_ICC_BIMC,
+ QCOM_ICC_QNOC,
+};
+
/**
* struct qcom_icc_provider - Qualcomm specific interconnect provider
* @provider: generic interconnect provider
* @bus_clks: the clk_bulk_data table of bus clocks
* @num_clks: the total number of clk_bulk_data entries
- * @is_bimc_node: indicates whether to use bimc specific setting
+ * @type: the ICC provider type
* @qos_offset: offset to QoS registers
* @regmap: regmap for QoS registers read/write access
*/
struct qcom_icc_provider {
struct icc_provider provider;
int num_clks;
- bool is_bimc_node;
+ enum qcom_icc_type type;
struct regmap *regmap;
unsigned int qos_offset;
struct clk_bulk_data bus_clks[];
@@ -38,6 +44,7 @@ struct qcom_icc_provider {
* @ap_owned: indicates if the node is owned by the AP or by the RPM
* @qos_mode: default qos mode for this node
* @qos_port: qos port number for finding qos registers of this node
+ * @urg_fwd_en: enable urgent forwarding
*/
struct qcom_icc_qos {
u32 areq_prio;
@@ -46,6 +53,7 @@ struct qcom_icc_qos {
bool ap_owned;
int qos_mode;
int qos_port;
+ bool urg_fwd_en;
};
/**
@@ -77,7 +85,8 @@ struct qcom_icc_desc {
size_t num_nodes;
const char * const *clocks;
size_t num_clocks;
- bool is_bimc_node;
+ bool has_bus_pd;
+ enum qcom_icc_type type;
const struct regmap_config *regmap_cfg;
unsigned int qos_offset;
};
diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index 3eb7936d2cf6..2c8e12549804 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -21,13 +21,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node)
{
size_t i;
struct qcom_icc_node *qn;
+ struct qcom_icc_provider *qp;
qn = node->data;
+ qp = to_qcom_provider(node->provider);
for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
qn->sum_avg[i] = 0;
qn->max_peak[i] = 0;
}
+
+ for (i = 0; i < qn->num_bcms; i++)
+ qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
}
EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate);
@@ -45,10 +50,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
{
size_t i;
struct qcom_icc_node *qn;
- struct qcom_icc_provider *qp;
qn = node->data;
- qp = to_qcom_provider(node->provider);
if (!tag)
tag = QCOM_ICC_TAG_ALWAYS;
@@ -68,9 +71,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
*agg_avg += avg_bw;
*agg_peak = max_t(u32, *agg_peak, peak_bw);
- for (i = 0; i < qn->num_bcms; i++)
- qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
-
return 0;
}
EXPORT_SYMBOL_GPL(qcom_icc_aggregate);
diff --git a/drivers/interconnect/qcom/msm8916.c b/drivers/interconnect/qcom/msm8916.c
index e3c995b11357..2f397a7c3322 100644
--- a/drivers/interconnect/qcom/msm8916.c
+++ b/drivers/interconnect/qcom/msm8916.c
@@ -1229,6 +1229,7 @@ static const struct regmap_config msm8916_snoc_regmap_config = {
};
static struct qcom_icc_desc msm8916_snoc = {
+ .type = QCOM_ICC_NOC,
.nodes = msm8916_snoc_nodes,
.num_nodes = ARRAY_SIZE(msm8916_snoc_nodes),
.regmap_cfg = &msm8916_snoc_regmap_config,
@@ -1256,9 +1257,9 @@ static const struct regmap_config msm8916_bimc_regmap_config = {
};
static struct qcom_icc_desc msm8916_bimc = {
+ .type = QCOM_ICC_BIMC,
.nodes = msm8916_bimc_nodes,
.num_nodes = ARRAY_SIZE(msm8916_bimc_nodes),
- .is_bimc_node = true,
.regmap_cfg = &msm8916_bimc_regmap_config,
.qos_offset = 0x8000,
};
@@ -1325,6 +1326,7 @@ static const struct regmap_config msm8916_pcnoc_regmap_config = {
};
static struct qcom_icc_desc msm8916_pcnoc = {
+ .type = QCOM_ICC_NOC,
.nodes = msm8916_pcnoc_nodes,
.num_nodes = ARRAY_SIZE(msm8916_pcnoc_nodes),
.regmap_cfg = &msm8916_pcnoc_regmap_config,
diff --git a/drivers/interconnect/qcom/msm8939.c b/drivers/interconnect/qcom/msm8939.c
index 16272a477bd8..d188f3636e4c 100644
--- a/drivers/interconnect/qcom/msm8939.c
+++ b/drivers/interconnect/qcom/msm8939.c
@@ -1282,6 +1282,7 @@ static const struct regmap_config msm8939_snoc_regmap_config = {
};
static struct qcom_icc_desc msm8939_snoc = {
+ .type = QCOM_ICC_NOC,
.nodes = msm8939_snoc_nodes,
.num_nodes = ARRAY_SIZE(msm8939_snoc_nodes),
.regmap_cfg = &msm8939_snoc_regmap_config,
@@ -1309,6 +1310,7 @@ static const struct regmap_config msm8939_snoc_mm_regmap_config = {
};
static struct qcom_icc_desc msm8939_snoc_mm = {
+ .type = QCOM_ICC_NOC,
.nodes = msm8939_snoc_mm_nodes,
.num_nodes = ARRAY_SIZE(msm8939_snoc_mm_nodes),
.regmap_cfg = &msm8939_snoc_mm_regmap_config,
@@ -1336,9 +1338,9 @@ static const struct regmap_config msm8939_bimc_regmap_config = {
};
static struct qcom_icc_desc msm8939_bimc = {
+ .type = QCOM_ICC_BIMC,
.nodes = msm8939_bimc_nodes,
.num_nodes = ARRAY_SIZE(msm8939_bimc_nodes),
- .is_bimc_node = true,
.regmap_cfg = &msm8939_bimc_regmap_config,
.qos_offset = 0x8000,
};
@@ -1407,6 +1409,7 @@ static const struct regmap_config msm8939_pcnoc_regmap_config = {
};
static struct qcom_icc_desc msm8939_pcnoc = {
+ .type = QCOM_ICC_NOC,
.nodes = msm8939_pcnoc_nodes,
.num_nodes = ARRAY_SIZE(msm8939_pcnoc_nodes),
.regmap_cfg = &msm8939_pcnoc_regmap_config,
diff --git a/drivers/interconnect/qcom/msm8996.c b/drivers/interconnect/qcom/msm8996.c
new file mode 100644
index 000000000000..499e11fbbd2e
--- /dev/null
+++ b/drivers/interconnect/qcom/msm8996.c
@@ -0,0 +1,2110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Qualcomm MSM8996 Network-on-Chip (NoC) QoS driver
+ *
+ * Copyright (c) 2021 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/interconnect/qcom,msm8996.h>
+
+#include "icc-rpm.h"
+#include "smd-rpm.h"
+#include "msm8996.h"
+
+static const char * const bus_mm_clocks[] = {
+ "bus",
+ "bus_a",
+ "iface"
+};
+
+static const char * const bus_a0noc_clocks[] = {
+ "aggre0_snoc_axi",
+ "aggre0_cnoc_ahb",
+ "aggre0_noc_mpu_cfg"
+};
+
+static const u16 mas_a0noc_common_links[] = {
+ MSM8996_SLAVE_A0NOC_SNOC
+};
+
+static struct qcom_icc_node mas_pcie_0 = {
+ .name = "mas_pcie_0",
+ .id = MSM8996_MASTER_PCIE_0,
+ .buswidth = 8,
+ .mas_rpm_id = 65,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 0,
+ .num_links = ARRAY_SIZE(mas_a0noc_common_links),
+ .links = mas_a0noc_common_links
+};
+
+static struct qcom_icc_node mas_pcie_1 = {
+ .name = "mas_pcie_1",
+ .id = MSM8996_MASTER_PCIE_1,
+ .buswidth = 8,
+ .mas_rpm_id = 66,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 1,
+ .num_links = ARRAY_SIZE(mas_a0noc_common_links),
+ .links = mas_a0noc_common_links
+};
+
+static struct qcom_icc_node mas_pcie_2 = {
+ .name = "mas_pcie_2",
+ .id = MSM8996_MASTER_PCIE_2,
+ .buswidth = 8,
+ .mas_rpm_id = 119,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 2,
+ .num_links = ARRAY_SIZE(mas_a0noc_common_links),
+ .links = mas_a0noc_common_links
+};
+
+static const u16 mas_a1noc_common_links[] = {
+ MSM8996_SLAVE_A1NOC_SNOC
+};
+
+static struct qcom_icc_node mas_cnoc_a1noc = {
+ .name = "mas_cnoc_a1noc",
+ .id = MSM8996_MASTER_CNOC_A1NOC,
+ .buswidth = 8,
+ .mas_rpm_id = 116,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_a1noc_common_links),
+ .links = mas_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_crypto_c0 = {
+ .name = "mas_crypto_c0",
+ .id = MSM8996_MASTER_CRYPTO_CORE0,
+ .buswidth = 8,
+ .mas_rpm_id = 23,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 0,
+ .num_links = ARRAY_SIZE(mas_a1noc_common_links),
+ .links = mas_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_pnoc_a1noc = {
+ .name = "mas_pnoc_a1noc",
+ .id = MSM8996_MASTER_PNOC_A1NOC,
+ .buswidth = 8,
+ .mas_rpm_id = 117,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = false,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 1,
+ .num_links = ARRAY_SIZE(mas_a1noc_common_links),
+ .links = mas_a1noc_common_links
+};
+
+static const u16 mas_a2noc_common_links[] = {
+ MSM8996_SLAVE_A2NOC_SNOC
+};
+
+static struct qcom_icc_node mas_usb3 = {
+ .name = "mas_usb3",
+ .id = MSM8996_MASTER_USB3,
+ .buswidth = 8,
+ .mas_rpm_id = 32,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 3,
+ .num_links = ARRAY_SIZE(mas_a2noc_common_links),
+ .links = mas_a2noc_common_links
+};
+
+static struct qcom_icc_node mas_ipa = {
+ .name = "mas_ipa",
+ .id = MSM8996_MASTER_IPA,
+ .buswidth = 8,
+ .mas_rpm_id = 59,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = -1,
+ .num_links = ARRAY_SIZE(mas_a2noc_common_links),
+ .links = mas_a2noc_common_links
+};
+
+static struct qcom_icc_node mas_ufs = {
+ .name = "mas_ufs",
+ .id = MSM8996_MASTER_UFS,
+ .buswidth = 8,
+ .mas_rpm_id = 68,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 2,
+ .num_links = ARRAY_SIZE(mas_a2noc_common_links),
+ .links = mas_a2noc_common_links
+};
+
+static const u16 mas_apps_proc_links[] = {
+ MSM8996_SLAVE_BIMC_SNOC_1,
+ MSM8996_SLAVE_EBI_CH0,
+ MSM8996_SLAVE_BIMC_SNOC_0
+};
+
+static struct qcom_icc_node mas_apps_proc = {
+ .name = "mas_apps_proc",
+ .id = MSM8996_MASTER_AMPSS_M0,
+ .buswidth = 8,
+ .mas_rpm_id = 0,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 0,
+ .num_links = ARRAY_SIZE(mas_apps_proc_links),
+ .links = mas_apps_proc_links
+};
+
+static const u16 mas_oxili_common_links[] = {
+ MSM8996_SLAVE_BIMC_SNOC_1,
+ MSM8996_SLAVE_HMSS_L3,
+ MSM8996_SLAVE_EBI_CH0,
+ MSM8996_SLAVE_BIMC_SNOC_0
+};
+
+static struct qcom_icc_node mas_oxili = {
+ .name = "mas_oxili",
+ .id = MSM8996_MASTER_GRAPHICS_3D,
+ .buswidth = 8,
+ .mas_rpm_id = 6,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 1,
+ .num_links = ARRAY_SIZE(mas_oxili_common_links),
+ .links = mas_oxili_common_links
+};
+
+static struct qcom_icc_node mas_mnoc_bimc = {
+ .name = "mas_mnoc_bimc",
+ .id = MSM8996_MASTER_MNOC_BIMC,
+ .buswidth = 8,
+ .mas_rpm_id = 2,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 2,
+ .num_links = ARRAY_SIZE(mas_oxili_common_links),
+ .links = mas_oxili_common_links
+};
+
+static const u16 mas_snoc_bimc_links[] = {
+ MSM8996_SLAVE_HMSS_L3,
+ MSM8996_SLAVE_EBI_CH0
+};
+
+static struct qcom_icc_node mas_snoc_bimc = {
+ .name = "mas_snoc_bimc",
+ .id = MSM8996_MASTER_SNOC_BIMC,
+ .buswidth = 8,
+ .mas_rpm_id = 3,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = false,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_bimc_links),
+ .links = mas_snoc_bimc_links
+};
+
+static const u16 mas_snoc_cnoc_links[] = {
+ MSM8996_SLAVE_CLK_CTL,
+ MSM8996_SLAVE_RBCPR_CX,
+ MSM8996_SLAVE_A2NOC_SMMU_CFG,
+ MSM8996_SLAVE_A0NOC_MPU_CFG,
+ MSM8996_SLAVE_MESSAGE_RAM,
+ MSM8996_SLAVE_CNOC_MNOC_MMSS_CFG,
+ MSM8996_SLAVE_PCIE_0_CFG,
+ MSM8996_SLAVE_TLMM,
+ MSM8996_SLAVE_MPM,
+ MSM8996_SLAVE_A0NOC_SMMU_CFG,
+ MSM8996_SLAVE_EBI1_PHY_CFG,
+ MSM8996_SLAVE_BIMC_CFG,
+ MSM8996_SLAVE_PIMEM_CFG,
+ MSM8996_SLAVE_RBCPR_MX,
+ MSM8996_SLAVE_PRNG,
+ MSM8996_SLAVE_PCIE20_AHB2PHY,
+ MSM8996_SLAVE_A2NOC_MPU_CFG,
+ MSM8996_SLAVE_QDSS_CFG,
+ MSM8996_SLAVE_A2NOC_CFG,
+ MSM8996_SLAVE_A0NOC_CFG,
+ MSM8996_SLAVE_UFS_CFG,
+ MSM8996_SLAVE_CRYPTO_0_CFG,
+ MSM8996_SLAVE_PCIE_1_CFG,
+ MSM8996_SLAVE_SNOC_CFG,
+ MSM8996_SLAVE_SNOC_MPU_CFG,
+ MSM8996_SLAVE_A1NOC_MPU_CFG,
+ MSM8996_SLAVE_A1NOC_SMMU_CFG,
+ MSM8996_SLAVE_PCIE_2_CFG,
+ MSM8996_SLAVE_CNOC_MNOC_CFG,
+ MSM8996_SLAVE_QDSS_RBCPR_APU_CFG,
+ MSM8996_SLAVE_PMIC_ARB,
+ MSM8996_SLAVE_IMEM_CFG,
+ MSM8996_SLAVE_A1NOC_CFG,
+ MSM8996_SLAVE_SSC_CFG,
+ MSM8996_SLAVE_TCSR,
+ MSM8996_SLAVE_LPASS_SMMU_CFG,
+ MSM8996_SLAVE_DCC_CFG
+};
+
+static struct qcom_icc_node mas_snoc_cnoc = {
+ .name = "mas_snoc_cnoc",
+ .id = MSM8996_MASTER_SNOC_CNOC,
+ .buswidth = 8,
+ .mas_rpm_id = 52,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_cnoc_links),
+ .links = mas_snoc_cnoc_links
+};
+
+static const u16 mas_qdss_dap_links[] = {
+ MSM8996_SLAVE_QDSS_RBCPR_APU_CFG,
+ MSM8996_SLAVE_RBCPR_CX,
+ MSM8996_SLAVE_A2NOC_SMMU_CFG,
+ MSM8996_SLAVE_A0NOC_MPU_CFG,
+ MSM8996_SLAVE_MESSAGE_RAM,
+ MSM8996_SLAVE_PCIE_0_CFG,
+ MSM8996_SLAVE_TLMM,
+ MSM8996_SLAVE_MPM,
+ MSM8996_SLAVE_A0NOC_SMMU_CFG,
+ MSM8996_SLAVE_EBI1_PHY_CFG,
+ MSM8996_SLAVE_BIMC_CFG,
+ MSM8996_SLAVE_PIMEM_CFG,
+ MSM8996_SLAVE_RBCPR_MX,
+ MSM8996_SLAVE_CLK_CTL,
+ MSM8996_SLAVE_PRNG,
+ MSM8996_SLAVE_PCIE20_AHB2PHY,
+ MSM8996_SLAVE_A2NOC_MPU_CFG,
+ MSM8996_SLAVE_QDSS_CFG,
+ MSM8996_SLAVE_A2NOC_CFG,
+ MSM8996_SLAVE_A0NOC_CFG,
+ MSM8996_SLAVE_UFS_CFG,
+ MSM8996_SLAVE_CRYPTO_0_CFG,
+ MSM8996_SLAVE_CNOC_A1NOC,
+ MSM8996_SLAVE_PCIE_1_CFG,
+ MSM8996_SLAVE_SNOC_CFG,
+ MSM8996_SLAVE_SNOC_MPU_CFG,
+ MSM8996_SLAVE_A1NOC_MPU_CFG,
+ MSM8996_SLAVE_A1NOC_SMMU_CFG,
+ MSM8996_SLAVE_PCIE_2_CFG,
+ MSM8996_SLAVE_CNOC_MNOC_CFG,
+ MSM8996_SLAVE_CNOC_MNOC_MMSS_CFG,
+ MSM8996_SLAVE_PMIC_ARB,
+ MSM8996_SLAVE_IMEM_CFG,
+ MSM8996_SLAVE_A1NOC_CFG,
+ MSM8996_SLAVE_SSC_CFG,
+ MSM8996_SLAVE_TCSR,
+ MSM8996_SLAVE_LPASS_SMMU_CFG,
+ MSM8996_SLAVE_DCC_CFG
+};
+
+static struct qcom_icc_node mas_qdss_dap = {
+ .name = "mas_qdss_dap",
+ .id = MSM8996_MASTER_QDSS_DAP,
+ .buswidth = 8,
+ .mas_rpm_id = 49,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_qdss_dap_links),
+ .links = mas_qdss_dap_links
+};
+
+static const u16 mas_cnoc_mnoc_mmss_cfg_links[] = {
+ MSM8996_SLAVE_MMAGIC_CFG,
+ MSM8996_SLAVE_DSA_MPU_CFG,
+ MSM8996_SLAVE_MMSS_CLK_CFG,
+ MSM8996_SLAVE_CAMERA_THROTTLE_CFG,
+ MSM8996_SLAVE_VENUS_CFG,
+ MSM8996_SLAVE_SMMU_VFE_CFG,
+ MSM8996_SLAVE_MISC_CFG,
+ MSM8996_SLAVE_SMMU_CPP_CFG,
+ MSM8996_SLAVE_GRAPHICS_3D_CFG,
+ MSM8996_SLAVE_DISPLAY_THROTTLE_CFG,
+ MSM8996_SLAVE_VENUS_THROTTLE_CFG,
+ MSM8996_SLAVE_CAMERA_CFG,
+ MSM8996_SLAVE_DISPLAY_CFG,
+ MSM8996_SLAVE_CPR_CFG,
+ MSM8996_SLAVE_SMMU_ROTATOR_CFG,
+ MSM8996_SLAVE_DSA_CFG,
+ MSM8996_SLAVE_SMMU_VENUS_CFG,
+ MSM8996_SLAVE_VMEM_CFG,
+ MSM8996_SLAVE_SMMU_JPEG_CFG,
+ MSM8996_SLAVE_SMMU_MDP_CFG,
+ MSM8996_SLAVE_MNOC_MPU_CFG
+};
+
+static struct qcom_icc_node mas_cnoc_mnoc_mmss_cfg = {
+ .name = "mas_cnoc_mnoc_mmss_cfg",
+ .id = MSM8996_MASTER_CNOC_MNOC_MMSS_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = 4,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_cnoc_mnoc_mmss_cfg_links),
+ .links = mas_cnoc_mnoc_mmss_cfg_links
+};
+
+static const u16 mas_cnoc_mnoc_cfg_links[] = {
+ MSM8996_SLAVE_SERVICE_MNOC
+};
+
+static struct qcom_icc_node mas_cnoc_mnoc_cfg = {
+ .name = "mas_cnoc_mnoc_cfg",
+ .id = MSM8996_MASTER_CNOC_MNOC_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = 5,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_cnoc_mnoc_cfg_links),
+ .links = mas_cnoc_mnoc_cfg_links
+};
+
+static const u16 mas_mnoc_bimc_common_links[] = {
+ MSM8996_SLAVE_MNOC_BIMC
+};
+
+static struct qcom_icc_node mas_cpp = {
+ .name = "mas_cpp",
+ .id = MSM8996_MASTER_CPP,
+ .buswidth = 32,
+ .mas_rpm_id = 115,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 5,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_jpeg = {
+ .name = "mas_jpeg",
+ .id = MSM8996_MASTER_JPEG,
+ .buswidth = 32,
+ .mas_rpm_id = 7,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 7,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_mdp_p0 = {
+ .name = "mas_mdp_p0",
+ .id = MSM8996_MASTER_MDP_PORT0,
+ .buswidth = 32,
+ .mas_rpm_id = 8,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 1,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_mdp_p1 = {
+ .name = "mas_mdp_p1",
+ .id = MSM8996_MASTER_MDP_PORT1,
+ .buswidth = 32,
+ .mas_rpm_id = 61,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 2,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_rotator = {
+ .name = "mas_rotator",
+ .id = MSM8996_MASTER_ROTATOR,
+ .buswidth = 32,
+ .mas_rpm_id = 120,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 0,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_venus = {
+ .name = "mas_venus",
+ .id = MSM8996_MASTER_VIDEO_P0,
+ .buswidth = 32,
+ .mas_rpm_id = 9,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 3,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static struct qcom_icc_node mas_vfe = {
+ .name = "mas_vfe",
+ .id = MSM8996_MASTER_VFE,
+ .buswidth = 32,
+ .mas_rpm_id = 11,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .qos.areq_prio = 0,
+ .qos.prio_level = 0,
+ .qos.qos_port = 6,
+ .num_links = ARRAY_SIZE(mas_mnoc_bimc_common_links),
+ .links = mas_mnoc_bimc_common_links
+};
+
+static const u16 mas_vmem_common_links[] = {
+ MSM8996_SLAVE_VMEM
+};
+
+static struct qcom_icc_node mas_snoc_vmem = {
+ .name = "mas_snoc_vmem",
+ .id = MSM8996_MASTER_SNOC_VMEM,
+ .buswidth = 32,
+ .mas_rpm_id = 114,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_vmem_common_links),
+ .links = mas_vmem_common_links
+};
+
+static struct qcom_icc_node mas_venus_vmem = {
+ .name = "mas_venus_vmem",
+ .id = MSM8996_MASTER_VIDEO_P0_OCMEM,
+ .buswidth = 32,
+ .mas_rpm_id = 121,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_vmem_common_links),
+ .links = mas_vmem_common_links
+};
+
+static const u16 mas_snoc_pnoc_links[] = {
+ MSM8996_SLAVE_BLSP_1,
+ MSM8996_SLAVE_BLSP_2,
+ MSM8996_SLAVE_SDCC_1,
+ MSM8996_SLAVE_SDCC_2,
+ MSM8996_SLAVE_SDCC_4,
+ MSM8996_SLAVE_TSIF,
+ MSM8996_SLAVE_PDM,
+ MSM8996_SLAVE_AHB2PHY
+};
+
+static struct qcom_icc_node mas_snoc_pnoc = {
+ .name = "mas_snoc_pnoc",
+ .id = MSM8996_MASTER_SNOC_PNOC,
+ .buswidth = 8,
+ .mas_rpm_id = 44,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_pnoc_links),
+ .links = mas_snoc_pnoc_links
+};
+
+static const u16 mas_pnoc_a1noc_common_links[] = {
+ MSM8996_SLAVE_PNOC_A1NOC
+};
+
+static struct qcom_icc_node mas_sdcc_1 = {
+ .name = "mas_sdcc_1",
+ .id = MSM8996_MASTER_SDCC_1,
+ .buswidth = 8,
+ .mas_rpm_id = 33,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_sdcc_2 = {
+ .name = "mas_sdcc_2",
+ .id = MSM8996_MASTER_SDCC_2,
+ .buswidth = 8,
+ .mas_rpm_id = 35,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_sdcc_4 = {
+ .name = "mas_sdcc_4",
+ .id = MSM8996_MASTER_SDCC_4,
+ .buswidth = 8,
+ .mas_rpm_id = 36,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_usb_hs = {
+ .name = "mas_usb_hs",
+ .id = MSM8996_MASTER_USB_HS,
+ .buswidth = 8,
+ .mas_rpm_id = 42,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_blsp_1 = {
+ .name = "mas_blsp_1",
+ .id = MSM8996_MASTER_BLSP_1,
+ .buswidth = 4,
+ .mas_rpm_id = 41,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_blsp_2 = {
+ .name = "mas_blsp_2",
+ .id = MSM8996_MASTER_BLSP_2,
+ .buswidth = 4,
+ .mas_rpm_id = 39,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static struct qcom_icc_node mas_tsif = {
+ .name = "mas_tsif",
+ .id = MSM8996_MASTER_TSIF,
+ .buswidth = 4,
+ .mas_rpm_id = 37,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pnoc_a1noc_common_links),
+ .links = mas_pnoc_a1noc_common_links
+};
+
+static const u16 mas_hmss_links[] = {
+ MSM8996_SLAVE_PIMEM,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_SNOC_BIMC
+};
+
+static struct qcom_icc_node mas_hmss = {
+ .name = "mas_hmss",
+ .id = MSM8996_MASTER_HMSS,
+ .buswidth = 8,
+ .mas_rpm_id = 118,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 4,
+ .num_links = ARRAY_SIZE(mas_hmss_links),
+ .links = mas_hmss_links
+};
+
+static const u16 mas_qdss_common_links[] = {
+ MSM8996_SLAVE_PIMEM,
+ MSM8996_SLAVE_USB3,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_SNOC_BIMC,
+ MSM8996_SLAVE_SNOC_PNOC
+};
+
+static struct qcom_icc_node mas_qdss_bam = {
+ .name = "mas_qdss_bam",
+ .id = MSM8996_MASTER_QDSS_BAM,
+ .buswidth = 16,
+ .mas_rpm_id = 19,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 2,
+ .num_links = ARRAY_SIZE(mas_qdss_common_links),
+ .links = mas_qdss_common_links
+};
+
+static const u16 mas_snoc_cfg_links[] = {
+ MSM8996_SLAVE_SERVICE_SNOC
+};
+
+static struct qcom_icc_node mas_snoc_cfg = {
+ .name = "mas_snoc_cfg",
+ .id = MSM8996_MASTER_SNOC_CFG,
+ .buswidth = 16,
+ .mas_rpm_id = 20,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_snoc_cfg_links),
+ .links = mas_snoc_cfg_links
+};
+
+static const u16 mas_bimc_snoc_0_links[] = {
+ MSM8996_SLAVE_SNOC_VMEM,
+ MSM8996_SLAVE_USB3,
+ MSM8996_SLAVE_PIMEM,
+ MSM8996_SLAVE_LPASS,
+ MSM8996_SLAVE_APPSS,
+ MSM8996_SLAVE_SNOC_CNOC,
+ MSM8996_SLAVE_SNOC_PNOC,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_QDSS_STM
+};
+
+static struct qcom_icc_node mas_bimc_snoc_0 = {
+ .name = "mas_bimc_snoc_0",
+ .id = MSM8996_MASTER_BIMC_SNOC_0,
+ .buswidth = 16,
+ .mas_rpm_id = 21,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_bimc_snoc_0_links),
+ .links = mas_bimc_snoc_0_links
+};
+
+static const u16 mas_bimc_snoc_1_links[] = {
+ MSM8996_SLAVE_PCIE_2,
+ MSM8996_SLAVE_PCIE_1,
+ MSM8996_SLAVE_PCIE_0
+};
+
+static struct qcom_icc_node mas_bimc_snoc_1 = {
+ .name = "mas_bimc_snoc_1",
+ .id = MSM8996_MASTER_BIMC_SNOC_1,
+ .buswidth = 16,
+ .mas_rpm_id = 109,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_bimc_snoc_1_links),
+ .links = mas_bimc_snoc_1_links
+};
+
+static const u16 mas_a0noc_snoc_links[] = {
+ MSM8996_SLAVE_SNOC_PNOC,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_APPSS,
+ MSM8996_SLAVE_SNOC_BIMC,
+ MSM8996_SLAVE_PIMEM
+};
+
+static struct qcom_icc_node mas_a0noc_snoc = {
+ .name = "mas_a0noc_snoc",
+ .id = MSM8996_MASTER_A0NOC_SNOC,
+ .buswidth = 16,
+ .mas_rpm_id = 110,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(mas_a0noc_snoc_links),
+ .links = mas_a0noc_snoc_links
+};
+
+static const u16 mas_a1noc_snoc_links[] = {
+ MSM8996_SLAVE_SNOC_VMEM,
+ MSM8996_SLAVE_USB3,
+ MSM8996_SLAVE_PCIE_0,
+ MSM8996_SLAVE_PIMEM,
+ MSM8996_SLAVE_PCIE_2,
+ MSM8996_SLAVE_LPASS,
+ MSM8996_SLAVE_PCIE_1,
+ MSM8996_SLAVE_APPSS,
+ MSM8996_SLAVE_SNOC_BIMC,
+ MSM8996_SLAVE_SNOC_CNOC,
+ MSM8996_SLAVE_SNOC_PNOC,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_QDSS_STM
+};
+
+static struct qcom_icc_node mas_a1noc_snoc = {
+ .name = "mas_a1noc_snoc",
+ .id = MSM8996_MASTER_A1NOC_SNOC,
+ .buswidth = 16,
+ .mas_rpm_id = 111,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_a1noc_snoc_links),
+ .links = mas_a1noc_snoc_links
+};
+
+static const u16 mas_a2noc_snoc_links[] = {
+ MSM8996_SLAVE_SNOC_VMEM,
+ MSM8996_SLAVE_USB3,
+ MSM8996_SLAVE_PCIE_1,
+ MSM8996_SLAVE_PIMEM,
+ MSM8996_SLAVE_PCIE_2,
+ MSM8996_SLAVE_QDSS_STM,
+ MSM8996_SLAVE_LPASS,
+ MSM8996_SLAVE_SNOC_BIMC,
+ MSM8996_SLAVE_SNOC_CNOC,
+ MSM8996_SLAVE_SNOC_PNOC,
+ MSM8996_SLAVE_OCIMEM,
+ MSM8996_SLAVE_PCIE_0
+};
+
+static struct qcom_icc_node mas_a2noc_snoc = {
+ .name = "mas_a2noc_snoc",
+ .id = MSM8996_MASTER_A2NOC_SNOC,
+ .buswidth = 16,
+ .mas_rpm_id = 112,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_a2noc_snoc_links),
+ .links = mas_a2noc_snoc_links
+};
+
+static struct qcom_icc_node mas_qdss_etr = {
+ .name = "mas_qdss_etr",
+ .id = MSM8996_MASTER_QDSS_ETR,
+ .buswidth = 16,
+ .mas_rpm_id = 31,
+ .slv_rpm_id = -1,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 1,
+ .qos.prio_level = 1,
+ .qos.qos_port = 3,
+ .num_links = ARRAY_SIZE(mas_qdss_common_links),
+ .links = mas_qdss_common_links
+};
+
+static const u16 slv_a0noc_snoc_links[] = {
+ MSM8996_MASTER_A0NOC_SNOC
+};
+
+static struct qcom_icc_node slv_a0noc_snoc = {
+ .name = "slv_a0noc_snoc",
+ .id = MSM8996_SLAVE_A0NOC_SNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 141,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_a0noc_snoc_links),
+ .links = slv_a0noc_snoc_links
+};
+
+static const u16 slv_a1noc_snoc_links[] = {
+ MSM8996_MASTER_A1NOC_SNOC
+};
+
+static struct qcom_icc_node slv_a1noc_snoc = {
+ .name = "slv_a1noc_snoc",
+ .id = MSM8996_SLAVE_A1NOC_SNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 142,
+ .num_links = ARRAY_SIZE(slv_a1noc_snoc_links),
+ .links = slv_a1noc_snoc_links
+};
+
+static const u16 slv_a2noc_snoc_links[] = {
+ MSM8996_MASTER_A2NOC_SNOC
+};
+
+static struct qcom_icc_node slv_a2noc_snoc = {
+ .name = "slv_a2noc_snoc",
+ .id = MSM8996_SLAVE_A2NOC_SNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 143,
+ .num_links = ARRAY_SIZE(slv_a2noc_snoc_links),
+ .links = slv_a2noc_snoc_links
+};
+
+static struct qcom_icc_node slv_ebi = {
+ .name = "slv_ebi",
+ .id = MSM8996_SLAVE_EBI_CH0,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 0
+};
+
+static struct qcom_icc_node slv_hmss_l3 = {
+ .name = "slv_hmss_l3",
+ .id = MSM8996_SLAVE_HMSS_L3,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 160
+};
+
+static const u16 slv_bimc_snoc_0_links[] = {
+ MSM8996_MASTER_BIMC_SNOC_0
+};
+
+static struct qcom_icc_node slv_bimc_snoc_0 = {
+ .name = "slv_bimc_snoc_0",
+ .id = MSM8996_SLAVE_BIMC_SNOC_0,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 2,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_bimc_snoc_0_links),
+ .links = slv_bimc_snoc_0_links
+};
+
+static const u16 slv_bimc_snoc_1_links[] = {
+ MSM8996_MASTER_BIMC_SNOC_1
+};
+
+static struct qcom_icc_node slv_bimc_snoc_1 = {
+ .name = "slv_bimc_snoc_1",
+ .id = MSM8996_SLAVE_BIMC_SNOC_1,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 138,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_bimc_snoc_1_links),
+ .links = slv_bimc_snoc_1_links
+};
+
+static const u16 slv_cnoc_a1noc_links[] = {
+ MSM8996_MASTER_CNOC_A1NOC
+};
+
+static struct qcom_icc_node slv_cnoc_a1noc = {
+ .name = "slv_cnoc_a1noc",
+ .id = MSM8996_SLAVE_CNOC_A1NOC,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 75,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_cnoc_a1noc_links),
+ .links = slv_cnoc_a1noc_links
+};
+
+static struct qcom_icc_node slv_clk_ctl = {
+ .name = "slv_clk_ctl",
+ .id = MSM8996_SLAVE_CLK_CTL,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 47
+};
+
+static struct qcom_icc_node slv_tcsr = {
+ .name = "slv_tcsr",
+ .id = MSM8996_SLAVE_TCSR,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 50
+};
+
+static struct qcom_icc_node slv_tlmm = {
+ .name = "slv_tlmm",
+ .id = MSM8996_SLAVE_TLMM,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 51
+};
+
+static struct qcom_icc_node slv_crypto0_cfg = {
+ .name = "slv_crypto0_cfg",
+ .id = MSM8996_SLAVE_CRYPTO_0_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 52,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_mpm = {
+ .name = "slv_mpm",
+ .id = MSM8996_SLAVE_MPM,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 62,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pimem_cfg = {
+ .name = "slv_pimem_cfg",
+ .id = MSM8996_SLAVE_PIMEM_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 167,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_imem_cfg = {
+ .name = "slv_imem_cfg",
+ .id = MSM8996_SLAVE_IMEM_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 54,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_message_ram = {
+ .name = "slv_message_ram",
+ .id = MSM8996_SLAVE_MESSAGE_RAM,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 55
+};
+
+static struct qcom_icc_node slv_bimc_cfg = {
+ .name = "slv_bimc_cfg",
+ .id = MSM8996_SLAVE_BIMC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 56,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pmic_arb = {
+ .name = "slv_pmic_arb",
+ .id = MSM8996_SLAVE_PMIC_ARB,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 59
+};
+
+static struct qcom_icc_node slv_prng = {
+ .name = "slv_prng",
+ .id = MSM8996_SLAVE_PRNG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 127,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_dcc_cfg = {
+ .name = "slv_dcc_cfg",
+ .id = MSM8996_SLAVE_DCC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 155,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_rbcpr_mx = {
+ .name = "slv_rbcpr_mx",
+ .id = MSM8996_SLAVE_RBCPR_MX,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 170,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_qdss_cfg = {
+ .name = "slv_qdss_cfg",
+ .id = MSM8996_SLAVE_QDSS_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 63,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_rbcpr_cx = {
+ .name = "slv_rbcpr_cx",
+ .id = MSM8996_SLAVE_RBCPR_CX,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 169,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_cpu_apu_cfg = {
+ .name = "slv_cpu_apu_cfg",
+ .id = MSM8996_SLAVE_QDSS_RBCPR_APU_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 168,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static const u16 slv_cnoc_mnoc_cfg_links[] = {
+ MSM8996_MASTER_CNOC_MNOC_CFG
+};
+
+static struct qcom_icc_node slv_cnoc_mnoc_cfg = {
+ .name = "slv_cnoc_mnoc_cfg",
+ .id = MSM8996_SLAVE_CNOC_MNOC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 66,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_cnoc_mnoc_cfg_links),
+ .links = slv_cnoc_mnoc_cfg_links
+};
+
+static struct qcom_icc_node slv_snoc_cfg = {
+ .name = "slv_snoc_cfg",
+ .id = MSM8996_SLAVE_SNOC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 70,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_snoc_mpu_cfg = {
+ .name = "slv_snoc_mpu_cfg",
+ .id = MSM8996_SLAVE_SNOC_MPU_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 67,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_ebi1_phy_cfg = {
+ .name = "slv_ebi1_phy_cfg",
+ .id = MSM8996_SLAVE_EBI1_PHY_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 73,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a0noc_cfg = {
+ .name = "slv_a0noc_cfg",
+ .id = MSM8996_SLAVE_A0NOC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 144,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie_1_cfg = {
+ .name = "slv_pcie_1_cfg",
+ .id = MSM8996_SLAVE_PCIE_1_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 89,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie_2_cfg = {
+ .name = "slv_pcie_2_cfg",
+ .id = MSM8996_SLAVE_PCIE_2_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 165,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie_0_cfg = {
+ .name = "slv_pcie_0_cfg",
+ .id = MSM8996_SLAVE_PCIE_0_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 88,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie20_ahb2phy = {
+ .name = "slv_pcie20_ahb2phy",
+ .id = MSM8996_SLAVE_PCIE20_AHB2PHY,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 163,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a0noc_mpu_cfg = {
+ .name = "slv_a0noc_mpu_cfg",
+ .id = MSM8996_SLAVE_A0NOC_MPU_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 145,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_ufs_cfg = {
+ .name = "slv_ufs_cfg",
+ .id = MSM8996_SLAVE_UFS_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 92,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a1noc_cfg = {
+ .name = "slv_a1noc_cfg",
+ .id = MSM8996_SLAVE_A1NOC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 147,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a1noc_mpu_cfg = {
+ .name = "slv_a1noc_mpu_cfg",
+ .id = MSM8996_SLAVE_A1NOC_MPU_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 148,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a2noc_cfg = {
+ .name = "slv_a2noc_cfg",
+ .id = MSM8996_SLAVE_A2NOC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 150,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a2noc_mpu_cfg = {
+ .name = "slv_a2noc_mpu_cfg",
+ .id = MSM8996_SLAVE_A2NOC_MPU_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 151,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_ssc_cfg = {
+ .name = "slv_ssc_cfg",
+ .id = MSM8996_SLAVE_SSC_CFG,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 177,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a0noc_smmu_cfg = {
+ .name = "slv_a0noc_smmu_cfg",
+ .id = MSM8996_SLAVE_A0NOC_SMMU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 146,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a1noc_smmu_cfg = {
+ .name = "slv_a1noc_smmu_cfg",
+ .id = MSM8996_SLAVE_A1NOC_SMMU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 149,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_a2noc_smmu_cfg = {
+ .name = "slv_a2noc_smmu_cfg",
+ .id = MSM8996_SLAVE_A2NOC_SMMU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 152,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_lpass_smmu_cfg = {
+ .name = "slv_lpass_smmu_cfg",
+ .id = MSM8996_SLAVE_LPASS_SMMU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 161,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static const u16 slv_cnoc_mnoc_mmss_cfg_links[] = {
+ MSM8996_MASTER_CNOC_MNOC_MMSS_CFG
+};
+
+static struct qcom_icc_node slv_cnoc_mnoc_mmss_cfg = {
+ .name = "slv_cnoc_mnoc_mmss_cfg",
+ .id = MSM8996_SLAVE_CNOC_MNOC_MMSS_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 58,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_cnoc_mnoc_mmss_cfg_links),
+ .links = slv_cnoc_mnoc_mmss_cfg_links
+};
+
+static struct qcom_icc_node slv_mmagic_cfg = {
+ .name = "slv_mmagic_cfg",
+ .id = MSM8996_SLAVE_MMAGIC_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 162,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_cpr_cfg = {
+ .name = "slv_cpr_cfg",
+ .id = MSM8996_SLAVE_CPR_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 6,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_misc_cfg = {
+ .name = "slv_misc_cfg",
+ .id = MSM8996_SLAVE_MISC_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_venus_throttle_cfg = {
+ .name = "slv_venus_throttle_cfg",
+ .id = MSM8996_SLAVE_VENUS_THROTTLE_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 178,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_venus_cfg = {
+ .name = "slv_venus_cfg",
+ .id = MSM8996_SLAVE_VENUS_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 10,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_vmem_cfg = {
+ .name = "slv_vmem_cfg",
+ .id = MSM8996_SLAVE_VMEM_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 180,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_dsa_cfg = {
+ .name = "slv_dsa_cfg",
+ .id = MSM8996_SLAVE_DSA_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 157,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_mnoc_clocks_cfg = {
+ .name = "slv_mnoc_clocks_cfg",
+ .id = MSM8996_SLAVE_MMSS_CLK_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 12,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_dsa_mpu_cfg = {
+ .name = "slv_dsa_mpu_cfg",
+ .id = MSM8996_SLAVE_DSA_MPU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 158,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_mnoc_mpu_cfg = {
+ .name = "slv_mnoc_mpu_cfg",
+ .id = MSM8996_SLAVE_MNOC_MPU_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 14,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_display_cfg = {
+ .name = "slv_display_cfg",
+ .id = MSM8996_SLAVE_DISPLAY_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_display_throttle_cfg = {
+ .name = "slv_display_throttle_cfg",
+ .id = MSM8996_SLAVE_DISPLAY_THROTTLE_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 156,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_camera_cfg = {
+ .name = "slv_camera_cfg",
+ .id = MSM8996_SLAVE_CAMERA_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 3,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_camera_throttle_cfg = {
+ .name = "slv_camera_throttle_cfg",
+ .id = MSM8996_SLAVE_CAMERA_THROTTLE_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 154,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_oxili_cfg = {
+ .name = "slv_oxili_cfg",
+ .id = MSM8996_SLAVE_GRAPHICS_3D_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 11,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_mdp_cfg = {
+ .name = "slv_smmu_mdp_cfg",
+ .id = MSM8996_SLAVE_SMMU_MDP_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 173,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_rot_cfg = {
+ .name = "slv_smmu_rot_cfg",
+ .id = MSM8996_SLAVE_SMMU_ROTATOR_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 174,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_venus_cfg = {
+ .name = "slv_smmu_venus_cfg",
+ .id = MSM8996_SLAVE_SMMU_VENUS_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 175,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_cpp_cfg = {
+ .name = "slv_smmu_cpp_cfg",
+ .id = MSM8996_SLAVE_SMMU_CPP_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 171,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_jpeg_cfg = {
+ .name = "slv_smmu_jpeg_cfg",
+ .id = MSM8996_SLAVE_SMMU_JPEG_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 172,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_smmu_vfe_cfg = {
+ .name = "slv_smmu_vfe_cfg",
+ .id = MSM8996_SLAVE_SMMU_VFE_CFG,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 176,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static const u16 slv_mnoc_bimc_links[] = {
+ MSM8996_MASTER_MNOC_BIMC
+};
+
+static struct qcom_icc_node slv_mnoc_bimc = {
+ .name = "slv_mnoc_bimc",
+ .id = MSM8996_SLAVE_MNOC_BIMC,
+ .buswidth = 32,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 16,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_mnoc_bimc_links),
+ .links = slv_mnoc_bimc_links
+};
+
+static struct qcom_icc_node slv_vmem = {
+ .name = "slv_vmem",
+ .id = MSM8996_SLAVE_VMEM,
+ .buswidth = 32,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 179,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_srvc_mnoc = {
+ .name = "slv_srvc_mnoc",
+ .id = MSM8996_SLAVE_SERVICE_MNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 17,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static const u16 slv_pnoc_a1noc_links[] = {
+ MSM8996_MASTER_PNOC_A1NOC
+};
+
+static struct qcom_icc_node slv_pnoc_a1noc = {
+ .name = "slv_pnoc_a1noc",
+ .id = MSM8996_SLAVE_PNOC_A1NOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 139,
+ .num_links = ARRAY_SIZE(slv_pnoc_a1noc_links),
+ .links = slv_pnoc_a1noc_links
+};
+
+static struct qcom_icc_node slv_usb_hs = {
+ .name = "slv_usb_hs",
+ .id = MSM8996_SLAVE_USB_HS,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 40
+};
+
+static struct qcom_icc_node slv_sdcc_2 = {
+ .name = "slv_sdcc_2",
+ .id = MSM8996_SLAVE_SDCC_2,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 33
+};
+
+static struct qcom_icc_node slv_sdcc_4 = {
+ .name = "slv_sdcc_4",
+ .id = MSM8996_SLAVE_SDCC_4,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 34
+};
+
+static struct qcom_icc_node slv_tsif = {
+ .name = "slv_tsif",
+ .id = MSM8996_SLAVE_TSIF,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 35
+};
+
+static struct qcom_icc_node slv_blsp_2 = {
+ .name = "slv_blsp_2",
+ .id = MSM8996_SLAVE_BLSP_2,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 37
+};
+
+static struct qcom_icc_node slv_sdcc_1 = {
+ .name = "slv_sdcc_1",
+ .id = MSM8996_SLAVE_SDCC_1,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 31
+};
+
+static struct qcom_icc_node slv_blsp_1 = {
+ .name = "slv_blsp_1",
+ .id = MSM8996_SLAVE_BLSP_1,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 39
+};
+
+static struct qcom_icc_node slv_pdm = {
+ .name = "slv_pdm",
+ .id = MSM8996_SLAVE_PDM,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 41
+};
+
+static struct qcom_icc_node slv_ahb2phy = {
+ .name = "slv_ahb2phy",
+ .id = MSM8996_SLAVE_AHB2PHY,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 153,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_hmss = {
+ .name = "slv_hmss",
+ .id = MSM8996_SLAVE_APPSS,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 20,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_lpass = {
+ .name = "slv_lpass",
+ .id = MSM8996_SLAVE_LPASS,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 21,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_usb3 = {
+ .name = "slv_usb3",
+ .id = MSM8996_SLAVE_USB3,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 22,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static const u16 slv_snoc_bimc_links[] = {
+ MSM8996_MASTER_SNOC_BIMC
+};
+
+static struct qcom_icc_node slv_snoc_bimc = {
+ .name = "slv_snoc_bimc",
+ .id = MSM8996_SLAVE_SNOC_BIMC,
+ .buswidth = 32,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 24,
+ .num_links = ARRAY_SIZE(slv_snoc_bimc_links),
+ .links = slv_snoc_bimc_links
+};
+
+static const u16 slv_snoc_cnoc_links[] = {
+ MSM8996_MASTER_SNOC_CNOC
+};
+
+static struct qcom_icc_node slv_snoc_cnoc = {
+ .name = "slv_snoc_cnoc",
+ .id = MSM8996_SLAVE_SNOC_CNOC,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 25,
+ .num_links = ARRAY_SIZE(slv_snoc_cnoc_links),
+ .links = slv_snoc_cnoc_links
+};
+
+static struct qcom_icc_node slv_imem = {
+ .name = "slv_imem",
+ .id = MSM8996_SLAVE_OCIMEM,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 26
+};
+
+static struct qcom_icc_node slv_pimem = {
+ .name = "slv_pimem",
+ .id = MSM8996_SLAVE_PIMEM,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 166
+};
+
+static const u16 slv_snoc_vmem_links[] = {
+ MSM8996_MASTER_SNOC_VMEM
+};
+
+static struct qcom_icc_node slv_snoc_vmem = {
+ .name = "slv_snoc_vmem",
+ .id = MSM8996_SLAVE_SNOC_VMEM,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 140,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .num_links = ARRAY_SIZE(slv_snoc_vmem_links),
+ .links = slv_snoc_vmem_links
+};
+
+static const u16 slv_snoc_pnoc_links[] = {
+ MSM8996_MASTER_SNOC_PNOC
+};
+
+static struct qcom_icc_node slv_snoc_pnoc = {
+ .name = "slv_snoc_pnoc",
+ .id = MSM8996_SLAVE_SNOC_PNOC,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 28,
+ .num_links = ARRAY_SIZE(slv_snoc_pnoc_links),
+ .links = slv_snoc_pnoc_links
+};
+
+static struct qcom_icc_node slv_qdss_stm = {
+ .name = "slv_qdss_stm",
+ .id = MSM8996_SLAVE_QDSS_STM,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 30
+};
+
+static struct qcom_icc_node slv_pcie_0 = {
+ .name = "slv_pcie_0",
+ .id = MSM8996_SLAVE_PCIE_0,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 84,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie_1 = {
+ .name = "slv_pcie_1",
+ .id = MSM8996_SLAVE_PCIE_1,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 85,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_pcie_2 = {
+ .name = "slv_pcie_2",
+ .id = MSM8996_SLAVE_PCIE_2,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 164,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node slv_srvc_snoc = {
+ .name = "slv_srvc_snoc",
+ .id = MSM8996_SLAVE_SERVICE_SNOC,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 29,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID
+};
+
+static struct qcom_icc_node *a0noc_nodes[] = {
+ [MASTER_PCIE_0] = &mas_pcie_0,
+ [MASTER_PCIE_1] = &mas_pcie_1,
+ [MASTER_PCIE_2] = &mas_pcie_2
+};
+
+static const struct regmap_config msm8996_a0noc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x9000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_a0noc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = a0noc_nodes,
+ .num_nodes = ARRAY_SIZE(a0noc_nodes),
+ .clocks = bus_a0noc_clocks,
+ .num_clocks = ARRAY_SIZE(bus_a0noc_clocks),
+ .has_bus_pd = true,
+ .regmap_cfg = &msm8996_a0noc_regmap_config
+};
+
+static struct qcom_icc_node *a1noc_nodes[] = {
+ [MASTER_CNOC_A1NOC] = &mas_cnoc_a1noc,
+ [MASTER_CRYPTO_CORE0] = &mas_crypto_c0,
+ [MASTER_PNOC_A1NOC] = &mas_pnoc_a1noc
+};
+
+static const struct regmap_config msm8996_a1noc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x7000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_a1noc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = a1noc_nodes,
+ .num_nodes = ARRAY_SIZE(a1noc_nodes),
+ .regmap_cfg = &msm8996_a1noc_regmap_config
+};
+
+static struct qcom_icc_node *a2noc_nodes[] = {
+ [MASTER_USB3] = &mas_usb3,
+ [MASTER_IPA] = &mas_ipa,
+ [MASTER_UFS] = &mas_ufs
+};
+
+static const struct regmap_config msm8996_a2noc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0xa000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_a2noc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = a2noc_nodes,
+ .num_nodes = ARRAY_SIZE(a2noc_nodes),
+ .regmap_cfg = &msm8996_a2noc_regmap_config
+};
+
+static struct qcom_icc_node *bimc_nodes[] = {
+ [MASTER_AMPSS_M0] = &mas_apps_proc,
+ [MASTER_GRAPHICS_3D] = &mas_oxili,
+ [MASTER_MNOC_BIMC] = &mas_mnoc_bimc,
+ [MASTER_SNOC_BIMC] = &mas_snoc_bimc,
+ [SLAVE_EBI_CH0] = &slv_ebi,
+ [SLAVE_HMSS_L3] = &slv_hmss_l3,
+ [SLAVE_BIMC_SNOC_0] = &slv_bimc_snoc_0,
+ [SLAVE_BIMC_SNOC_1] = &slv_bimc_snoc_1
+};
+
+static const struct regmap_config msm8996_bimc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x62000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_bimc = {
+ .type = QCOM_ICC_BIMC,
+ .nodes = bimc_nodes,
+ .num_nodes = ARRAY_SIZE(bimc_nodes),
+ .regmap_cfg = &msm8996_bimc_regmap_config
+};
+
+static struct qcom_icc_node *cnoc_nodes[] = {
+ [MASTER_SNOC_CNOC] = &mas_snoc_cnoc,
+ [MASTER_QDSS_DAP] = &mas_qdss_dap,
+ [SLAVE_CNOC_A1NOC] = &slv_cnoc_a1noc,
+ [SLAVE_CLK_CTL] = &slv_clk_ctl,
+ [SLAVE_TCSR] = &slv_tcsr,
+ [SLAVE_TLMM] = &slv_tlmm,
+ [SLAVE_CRYPTO_0_CFG] = &slv_crypto0_cfg,
+ [SLAVE_MPM] = &slv_mpm,
+ [SLAVE_PIMEM_CFG] = &slv_pimem_cfg,
+ [SLAVE_IMEM_CFG] = &slv_imem_cfg,
+ [SLAVE_MESSAGE_RAM] = &slv_message_ram,
+ [SLAVE_BIMC_CFG] = &slv_bimc_cfg,
+ [SLAVE_PMIC_ARB] = &slv_pmic_arb,
+ [SLAVE_PRNG] = &slv_prng,
+ [SLAVE_DCC_CFG] = &slv_dcc_cfg,
+ [SLAVE_RBCPR_MX] = &slv_rbcpr_mx,
+ [SLAVE_QDSS_CFG] = &slv_qdss_cfg,
+ [SLAVE_RBCPR_CX] = &slv_rbcpr_cx,
+ [SLAVE_QDSS_RBCPR_APU] = &slv_cpu_apu_cfg,
+ [SLAVE_CNOC_MNOC_CFG] = &slv_cnoc_mnoc_cfg,
+ [SLAVE_SNOC_CFG] = &slv_snoc_cfg,
+ [SLAVE_SNOC_MPU_CFG] = &slv_snoc_mpu_cfg,
+ [SLAVE_EBI1_PHY_CFG] = &slv_ebi1_phy_cfg,
+ [SLAVE_A0NOC_CFG] = &slv_a0noc_cfg,
+ [SLAVE_PCIE_1_CFG] = &slv_pcie_1_cfg,
+ [SLAVE_PCIE_2_CFG] = &slv_pcie_2_cfg,
+ [SLAVE_PCIE_0_CFG] = &slv_pcie_0_cfg,
+ [SLAVE_PCIE20_AHB2PHY] = &slv_pcie20_ahb2phy,
+ [SLAVE_A0NOC_MPU_CFG] = &slv_a0noc_mpu_cfg,
+ [SLAVE_UFS_CFG] = &slv_ufs_cfg,
+ [SLAVE_A1NOC_CFG] = &slv_a1noc_cfg,
+ [SLAVE_A1NOC_MPU_CFG] = &slv_a1noc_mpu_cfg,
+ [SLAVE_A2NOC_CFG] = &slv_a2noc_cfg,
+ [SLAVE_A2NOC_MPU_CFG] = &slv_a2noc_mpu_cfg,
+ [SLAVE_SSC_CFG] = &slv_ssc_cfg,
+ [SLAVE_A0NOC_SMMU_CFG] = &slv_a0noc_smmu_cfg,
+ [SLAVE_A1NOC_SMMU_CFG] = &slv_a1noc_smmu_cfg,
+ [SLAVE_A2NOC_SMMU_CFG] = &slv_a2noc_smmu_cfg,
+ [SLAVE_LPASS_SMMU_CFG] = &slv_lpass_smmu_cfg,
+ [SLAVE_CNOC_MNOC_MMSS_CFG] = &slv_cnoc_mnoc_mmss_cfg
+};
+
+static const struct regmap_config msm8996_cnoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x1000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_cnoc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = cnoc_nodes,
+ .num_nodes = ARRAY_SIZE(cnoc_nodes),
+ .regmap_cfg = &msm8996_cnoc_regmap_config
+};
+
+static struct qcom_icc_node *mnoc_nodes[] = {
+ [MASTER_CNOC_MNOC_CFG] = &mas_cnoc_mnoc_cfg,
+ [MASTER_CPP] = &mas_cpp,
+ [MASTER_JPEG] = &mas_jpeg,
+ [MASTER_MDP_PORT0] = &mas_mdp_p0,
+ [MASTER_MDP_PORT1] = &mas_mdp_p1,
+ [MASTER_ROTATOR] = &mas_rotator,
+ [MASTER_VIDEO_P0] = &mas_venus,
+ [MASTER_VFE] = &mas_vfe,
+ [MASTER_SNOC_VMEM] = &mas_snoc_vmem,
+ [MASTER_VIDEO_P0_OCMEM] = &mas_venus_vmem,
+ [MASTER_CNOC_MNOC_MMSS_CFG] = &mas_cnoc_mnoc_mmss_cfg,
+ [SLAVE_MNOC_BIMC] = &slv_mnoc_bimc,
+ [SLAVE_VMEM] = &slv_vmem,
+ [SLAVE_SERVICE_MNOC] = &slv_srvc_mnoc,
+ [SLAVE_MMAGIC_CFG] = &slv_mmagic_cfg,
+ [SLAVE_CPR_CFG] = &slv_cpr_cfg,
+ [SLAVE_MISC_CFG] = &slv_misc_cfg,
+ [SLAVE_VENUS_THROTTLE_CFG] = &slv_venus_throttle_cfg,
+ [SLAVE_VENUS_CFG] = &slv_venus_cfg,
+ [SLAVE_VMEM_CFG] = &slv_vmem_cfg,
+ [SLAVE_DSA_CFG] = &slv_dsa_cfg,
+ [SLAVE_MMSS_CLK_CFG] = &slv_mnoc_clocks_cfg,
+ [SLAVE_DSA_MPU_CFG] = &slv_dsa_mpu_cfg,
+ [SLAVE_MNOC_MPU_CFG] = &slv_mnoc_mpu_cfg,
+ [SLAVE_DISPLAY_CFG] = &slv_display_cfg,
+ [SLAVE_DISPLAY_THROTTLE_CFG] = &slv_display_throttle_cfg,
+ [SLAVE_CAMERA_CFG] = &slv_camera_cfg,
+ [SLAVE_CAMERA_THROTTLE_CFG] = &slv_camera_throttle_cfg,
+ [SLAVE_GRAPHICS_3D_CFG] = &slv_oxili_cfg,
+ [SLAVE_SMMU_MDP_CFG] = &slv_smmu_mdp_cfg,
+ [SLAVE_SMMU_ROT_CFG] = &slv_smmu_rot_cfg,
+ [SLAVE_SMMU_VENUS_CFG] = &slv_smmu_venus_cfg,
+ [SLAVE_SMMU_CPP_CFG] = &slv_smmu_cpp_cfg,
+ [SLAVE_SMMU_JPEG_CFG] = &slv_smmu_jpeg_cfg,
+ [SLAVE_SMMU_VFE_CFG] = &slv_smmu_vfe_cfg
+};
+
+static const struct regmap_config msm8996_mnoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x20000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_mnoc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = mnoc_nodes,
+ .num_nodes = ARRAY_SIZE(mnoc_nodes),
+ .clocks = bus_mm_clocks,
+ .num_clocks = ARRAY_SIZE(bus_mm_clocks),
+ .regmap_cfg = &msm8996_mnoc_regmap_config
+};
+
+static struct qcom_icc_node *pnoc_nodes[] = {
+ [MASTER_SNOC_PNOC] = &mas_snoc_pnoc,
+ [MASTER_SDCC_1] = &mas_sdcc_1,
+ [MASTER_SDCC_2] = &mas_sdcc_2,
+ [MASTER_SDCC_4] = &mas_sdcc_4,
+ [MASTER_USB_HS] = &mas_usb_hs,
+ [MASTER_BLSP_1] = &mas_blsp_1,
+ [MASTER_BLSP_2] = &mas_blsp_2,
+ [MASTER_TSIF] = &mas_tsif,
+ [SLAVE_PNOC_A1NOC] = &slv_pnoc_a1noc,
+ [SLAVE_USB_HS] = &slv_usb_hs,
+ [SLAVE_SDCC_2] = &slv_sdcc_2,
+ [SLAVE_SDCC_4] = &slv_sdcc_4,
+ [SLAVE_TSIF] = &slv_tsif,
+ [SLAVE_BLSP_2] = &slv_blsp_2,
+ [SLAVE_SDCC_1] = &slv_sdcc_1,
+ [SLAVE_BLSP_1] = &slv_blsp_1,
+ [SLAVE_PDM] = &slv_pdm,
+ [SLAVE_AHB2PHY] = &slv_ahb2phy
+};
+
+static const struct regmap_config msm8996_pnoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x3000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_pnoc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = pnoc_nodes,
+ .num_nodes = ARRAY_SIZE(pnoc_nodes),
+ .regmap_cfg = &msm8996_pnoc_regmap_config
+};
+
+static struct qcom_icc_node *snoc_nodes[] = {
+ [MASTER_HMSS] = &mas_hmss,
+ [MASTER_QDSS_BAM] = &mas_qdss_bam,
+ [MASTER_SNOC_CFG] = &mas_snoc_cfg,
+ [MASTER_BIMC_SNOC_0] = &mas_bimc_snoc_0,
+ [MASTER_BIMC_SNOC_1] = &mas_bimc_snoc_1,
+ [MASTER_A0NOC_SNOC] = &mas_a0noc_snoc,
+ [MASTER_A1NOC_SNOC] = &mas_a1noc_snoc,
+ [MASTER_A2NOC_SNOC] = &mas_a2noc_snoc,
+ [MASTER_QDSS_ETR] = &mas_qdss_etr,
+ [SLAVE_A0NOC_SNOC] = &slv_a0noc_snoc,
+ [SLAVE_A1NOC_SNOC] = &slv_a1noc_snoc,
+ [SLAVE_A2NOC_SNOC] = &slv_a2noc_snoc,
+ [SLAVE_HMSS] = &slv_hmss,
+ [SLAVE_LPASS] = &slv_lpass,
+ [SLAVE_USB3] = &slv_usb3,
+ [SLAVE_SNOC_BIMC] = &slv_snoc_bimc,
+ [SLAVE_SNOC_CNOC] = &slv_snoc_cnoc,
+ [SLAVE_IMEM] = &slv_imem,
+ [SLAVE_PIMEM] = &slv_pimem,
+ [SLAVE_SNOC_VMEM] = &slv_snoc_vmem,
+ [SLAVE_SNOC_PNOC] = &slv_snoc_pnoc,
+ [SLAVE_QDSS_STM] = &slv_qdss_stm,
+ [SLAVE_PCIE_0] = &slv_pcie_0,
+ [SLAVE_PCIE_1] = &slv_pcie_1,
+ [SLAVE_PCIE_2] = &slv_pcie_2,
+ [SLAVE_SERVICE_SNOC] = &slv_srvc_snoc
+};
+
+static const struct regmap_config msm8996_snoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x20000,
+ .fast_io = true
+};
+
+static const struct qcom_icc_desc msm8996_snoc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = snoc_nodes,
+ .num_nodes = ARRAY_SIZE(snoc_nodes),
+ .regmap_cfg = &msm8996_snoc_regmap_config
+};
+
+static const struct of_device_id qnoc_of_match[] = {
+ { .compatible = "qcom,msm8996-a0noc", .data = &msm8996_a0noc},
+ { .compatible = "qcom,msm8996-a1noc", .data = &msm8996_a1noc},
+ { .compatible = "qcom,msm8996-a2noc", .data = &msm8996_a2noc},
+ { .compatible = "qcom,msm8996-bimc", .data = &msm8996_bimc},
+ { .compatible = "qcom,msm8996-cnoc", .data = &msm8996_cnoc},
+ { .compatible = "qcom,msm8996-mnoc", .data = &msm8996_mnoc},
+ { .compatible = "qcom,msm8996-pnoc", .data = &msm8996_pnoc},
+ { .compatible = "qcom,msm8996-snoc", .data = &msm8996_snoc},
+ { }
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+ .probe = qnoc_probe,
+ .remove = qnoc_remove,
+ .driver = {
+ .name = "qnoc-msm8996",
+ .of_match_table = qnoc_of_match,
+ .sync_state = icc_sync_state,
+ }
+};
+module_platform_driver(qnoc_driver);
+
+MODULE_AUTHOR("Yassine Oudjana <y.oudjana@protonmail.com>");
+MODULE_DESCRIPTION("Qualcomm MSM8996 NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/msm8996.h b/drivers/interconnect/qcom/msm8996.h
new file mode 100644
index 000000000000..42b54ffcaa7b
--- /dev/null
+++ b/drivers/interconnect/qcom/msm8996.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Qualcomm MSM8996 interconnect IDs
+ *
+ * Copyright (c) 2021 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_MSM8996_H__
+#define __DRIVERS_INTERCONNECT_QCOM_MSM8996_H__
+
+#define MSM8996_MASTER_PCIE_0 1
+#define MSM8996_MASTER_PCIE_1 2
+#define MSM8996_MASTER_PCIE_2 3
+#define MSM8996_MASTER_CNOC_A1NOC 4
+#define MSM8996_MASTER_CRYPTO_CORE0 5
+#define MSM8996_MASTER_PNOC_A1NOC 6
+#define MSM8996_MASTER_USB3 7
+#define MSM8996_MASTER_IPA 8
+#define MSM8996_MASTER_UFS 9
+#define MSM8996_MASTER_AMPSS_M0 10
+#define MSM8996_MASTER_GRAPHICS_3D 11
+#define MSM8996_MASTER_MNOC_BIMC 12
+#define MSM8996_MASTER_SNOC_BIMC 13
+#define MSM8996_MASTER_SNOC_CNOC 14
+#define MSM8996_MASTER_QDSS_DAP 15
+#define MSM8996_MASTER_CNOC_MNOC_MMSS_CFG 16
+#define MSM8996_MASTER_CNOC_MNOC_CFG 17
+#define MSM8996_MASTER_CPP 18
+#define MSM8996_MASTER_JPEG 19
+#define MSM8996_MASTER_MDP_PORT0 20
+#define MSM8996_MASTER_MDP_PORT1 21
+#define MSM8996_MASTER_ROTATOR 22
+#define MSM8996_MASTER_VIDEO_P0 23
+#define MSM8996_MASTER_VFE 24
+#define MSM8996_MASTER_SNOC_VMEM 25
+#define MSM8996_MASTER_VIDEO_P0_OCMEM 26
+#define MSM8996_MASTER_SNOC_PNOC 27
+#define MSM8996_MASTER_SDCC_1 28
+#define MSM8996_MASTER_SDCC_2 29
+#define MSM8996_MASTER_SDCC_4 30
+#define MSM8996_MASTER_USB_HS 31
+#define MSM8996_MASTER_BLSP_1 32
+#define MSM8996_MASTER_BLSP_2 33
+#define MSM8996_MASTER_TSIF 34
+#define MSM8996_MASTER_HMSS 35
+#define MSM8996_MASTER_QDSS_BAM 36
+#define MSM8996_MASTER_SNOC_CFG 37
+#define MSM8996_MASTER_BIMC_SNOC_0 38
+#define MSM8996_MASTER_BIMC_SNOC_1 39
+#define MSM8996_MASTER_A0NOC_SNOC 40
+#define MSM8996_MASTER_A1NOC_SNOC 41
+#define MSM8996_MASTER_A2NOC_SNOC 42
+#define MSM8996_MASTER_QDSS_ETR 43
+
+#define MSM8996_SLAVE_A0NOC_SNOC 44
+#define MSM8996_SLAVE_A1NOC_SNOC 45
+#define MSM8996_SLAVE_A2NOC_SNOC 46
+#define MSM8996_SLAVE_EBI_CH0 47
+#define MSM8996_SLAVE_HMSS_L3 48
+#define MSM8996_SLAVE_BIMC_SNOC_0 49
+#define MSM8996_SLAVE_BIMC_SNOC_1 50
+#define MSM8996_SLAVE_CNOC_A1NOC 51
+#define MSM8996_SLAVE_CLK_CTL 52
+#define MSM8996_SLAVE_TCSR 53
+#define MSM8996_SLAVE_TLMM 54
+#define MSM8996_SLAVE_CRYPTO_0_CFG 55
+#define MSM8996_SLAVE_MPM 56
+#define MSM8996_SLAVE_PIMEM_CFG 57
+#define MSM8996_SLAVE_IMEM_CFG 58
+#define MSM8996_SLAVE_MESSAGE_RAM 59
+#define MSM8996_SLAVE_BIMC_CFG 60
+#define MSM8996_SLAVE_PMIC_ARB 61
+#define MSM8996_SLAVE_PRNG 62
+#define MSM8996_SLAVE_DCC_CFG 63
+#define MSM8996_SLAVE_RBCPR_MX 64
+#define MSM8996_SLAVE_QDSS_CFG 65
+#define MSM8996_SLAVE_RBCPR_CX 66
+#define MSM8996_SLAVE_QDSS_RBCPR_APU_CFG 67
+#define MSM8996_SLAVE_CNOC_MNOC_CFG 68
+#define MSM8996_SLAVE_SNOC_CFG 69
+#define MSM8996_SLAVE_SNOC_MPU_CFG 70
+#define MSM8996_SLAVE_EBI1_PHY_CFG 71
+#define MSM8996_SLAVE_A0NOC_CFG 72
+#define MSM8996_SLAVE_PCIE_1_CFG 73
+#define MSM8996_SLAVE_PCIE_2_CFG 74
+#define MSM8996_SLAVE_PCIE_0_CFG 75
+#define MSM8996_SLAVE_PCIE20_AHB2PHY 76
+#define MSM8996_SLAVE_A0NOC_MPU_CFG 77
+#define MSM8996_SLAVE_UFS_CFG 78
+#define MSM8996_SLAVE_A1NOC_CFG 79
+#define MSM8996_SLAVE_A1NOC_MPU_CFG 80
+#define MSM8996_SLAVE_A2NOC_CFG 81
+#define MSM8996_SLAVE_A2NOC_MPU_CFG 82
+#define MSM8996_SLAVE_SSC_CFG 83
+#define MSM8996_SLAVE_A0NOC_SMMU_CFG 84
+#define MSM8996_SLAVE_A1NOC_SMMU_CFG 85
+#define MSM8996_SLAVE_A2NOC_SMMU_CFG 86
+#define MSM8996_SLAVE_LPASS_SMMU_CFG 87
+#define MSM8996_SLAVE_CNOC_MNOC_MMSS_CFG 88
+#define MSM8996_SLAVE_MMAGIC_CFG 89
+#define MSM8996_SLAVE_CPR_CFG 90
+#define MSM8996_SLAVE_MISC_CFG 91
+#define MSM8996_SLAVE_VENUS_THROTTLE_CFG 92
+#define MSM8996_SLAVE_VENUS_CFG 93
+#define MSM8996_SLAVE_VMEM_CFG 94
+#define MSM8996_SLAVE_DSA_CFG 95
+#define MSM8996_SLAVE_MMSS_CLK_CFG 96
+#define MSM8996_SLAVE_DSA_MPU_CFG 97
+#define MSM8996_SLAVE_MNOC_MPU_CFG 98
+#define MSM8996_SLAVE_DISPLAY_CFG 99
+#define MSM8996_SLAVE_DISPLAY_THROTTLE_CFG 100
+#define MSM8996_SLAVE_CAMERA_CFG 101
+#define MSM8996_SLAVE_CAMERA_THROTTLE_CFG 102
+#define MSM8996_SLAVE_GRAPHICS_3D_CFG 103
+#define MSM8996_SLAVE_SMMU_MDP_CFG 104
+#define MSM8996_SLAVE_SMMU_ROTATOR_CFG 105
+#define MSM8996_SLAVE_SMMU_VENUS_CFG 106
+#define MSM8996_SLAVE_SMMU_CPP_CFG 107
+#define MSM8996_SLAVE_SMMU_JPEG_CFG 108
+#define MSM8996_SLAVE_SMMU_VFE_CFG 109
+#define MSM8996_SLAVE_MNOC_BIMC 110
+#define MSM8996_SLAVE_VMEM 111
+#define MSM8996_SLAVE_SERVICE_MNOC 112
+#define MSM8996_SLAVE_PNOC_A1NOC 113
+#define MSM8996_SLAVE_USB_HS 114
+#define MSM8996_SLAVE_SDCC_2 115
+#define MSM8996_SLAVE_SDCC_4 116
+#define MSM8996_SLAVE_TSIF 117
+#define MSM8996_SLAVE_BLSP_2 118
+#define MSM8996_SLAVE_SDCC_1 119
+#define MSM8996_SLAVE_BLSP_1 120
+#define MSM8996_SLAVE_PDM 121
+#define MSM8996_SLAVE_AHB2PHY 122
+#define MSM8996_SLAVE_APPSS 123
+#define MSM8996_SLAVE_LPASS 124
+#define MSM8996_SLAVE_USB3 125
+#define MSM8996_SLAVE_SNOC_BIMC 126
+#define MSM8996_SLAVE_SNOC_CNOC 127
+#define MSM8996_SLAVE_OCIMEM 128
+#define MSM8996_SLAVE_PIMEM 129
+#define MSM8996_SLAVE_SNOC_VMEM 130
+#define MSM8996_SLAVE_SNOC_PNOC 131
+#define MSM8996_SLAVE_QDSS_STM 132
+#define MSM8996_SLAVE_PCIE_0 133
+#define MSM8996_SLAVE_PCIE_1 134
+#define MSM8996_SLAVE_PCIE_2 135
+#define MSM8996_SLAVE_SERVICE_SNOC 136
+
+#endif /* __DRIVERS_INTERCONNECT_QCOM_MSM8996_H__ */
diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index c7af143980de..eec13099a6a3 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/bitfield.h>
@@ -15,6 +15,7 @@
#include <dt-bindings/interconnect/qcom,osm-l3.h>
#include "sc7180.h"
+#include "sc7280.h"
#include "sc8180x.h"
#include "sdm845.h"
#include "sm8150.h"
@@ -114,6 +115,22 @@ static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = {
.reg_perf_state = OSM_REG_PERF_STATE,
};
+DEFINE_QNODE(sc7280_epss_apps_l3, SC7280_MASTER_EPSS_L3_APPS, 32, SC7280_SLAVE_EPSS_L3);
+DEFINE_QNODE(sc7280_epss_l3, SC7280_SLAVE_EPSS_L3, 32);
+
+static const struct qcom_osm_l3_node *sc7280_epss_l3_nodes[] = {
+ [MASTER_EPSS_L3_APPS] = &sc7280_epss_apps_l3,
+ [SLAVE_EPSS_L3_SHARED] = &sc7280_epss_l3,
+};
+
+static const struct qcom_osm_l3_desc sc7280_icc_epss_l3 = {
+ .nodes = sc7280_epss_l3_nodes,
+ .num_nodes = ARRAY_SIZE(sc7280_epss_l3_nodes),
+ .lut_row_size = EPSS_LUT_ROW_SIZE,
+ .reg_freq_lut = EPSS_REG_FREQ_LUT,
+ .reg_perf_state = EPSS_REG_PERF_STATE,
+};
+
DEFINE_QNODE(sc8180x_osm_apps_l3, SC8180X_MASTER_OSM_L3_APPS, 32, SC8180X_SLAVE_OSM_L3);
DEFINE_QNODE(sc8180x_osm_l3, SC8180X_SLAVE_OSM_L3, 32);
@@ -326,6 +343,7 @@ err:
static const struct of_device_id osm_l3_of_match[] = {
{ .compatible = "qcom,sc7180-osm-l3", .data = &sc7180_icc_osm_l3 },
+ { .compatible = "qcom,sc7280-epss-l3", .data = &sc7280_icc_epss_l3 },
{ .compatible = "qcom,sdm845-osm-l3", .data = &sdm845_icc_osm_l3 },
{ .compatible = "qcom,sm8150-osm-l3", .data = &sm8150_icc_osm_l3 },
{ .compatible = "qcom,sc8180x-osm-l3", .data = &sc8180x_icc_osm_l3 },
diff --git a/drivers/interconnect/qcom/qcm2290.c b/drivers/interconnect/qcom/qcm2290.c
new file mode 100644
index 000000000000..74404e0b2080
--- /dev/null
+++ b/drivers/interconnect/qcom/qcm2290.c
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Qualcomm QCM2290 Network-on-Chip (NoC) QoS driver
+ *
+ * Copyright (c) 2021, Linaro Ltd.
+ *
+ */
+
+#include <dt-bindings/interconnect/qcom,qcm2290.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "icc-rpm.h"
+#include "smd-rpm.h"
+
+enum {
+ QCM2290_MASTER_APPSS_PROC = 1,
+ QCM2290_MASTER_SNOC_BIMC_RT,
+ QCM2290_MASTER_SNOC_BIMC_NRT,
+ QCM2290_MASTER_SNOC_BIMC,
+ QCM2290_MASTER_TCU_0,
+ QCM2290_MASTER_GFX3D,
+ QCM2290_MASTER_SNOC_CNOC,
+ QCM2290_MASTER_QDSS_DAP,
+ QCM2290_MASTER_CRYPTO_CORE0,
+ QCM2290_MASTER_SNOC_CFG,
+ QCM2290_MASTER_TIC,
+ QCM2290_MASTER_ANOC_SNOC,
+ QCM2290_MASTER_BIMC_SNOC,
+ QCM2290_MASTER_PIMEM,
+ QCM2290_MASTER_QDSS_BAM,
+ QCM2290_MASTER_QUP_0,
+ QCM2290_MASTER_IPA,
+ QCM2290_MASTER_QDSS_ETR,
+ QCM2290_MASTER_SDCC_1,
+ QCM2290_MASTER_SDCC_2,
+ QCM2290_MASTER_QPIC,
+ QCM2290_MASTER_USB3_0,
+ QCM2290_MASTER_QUP_CORE_0,
+ QCM2290_MASTER_CAMNOC_SF,
+ QCM2290_MASTER_VIDEO_P0,
+ QCM2290_MASTER_VIDEO_PROC,
+ QCM2290_MASTER_CAMNOC_HF,
+ QCM2290_MASTER_MDP0,
+
+ QCM2290_SLAVE_EBI1,
+ QCM2290_SLAVE_BIMC_SNOC,
+ QCM2290_SLAVE_BIMC_CFG,
+ QCM2290_SLAVE_CAMERA_NRT_THROTTLE_CFG,
+ QCM2290_SLAVE_CAMERA_RT_THROTTLE_CFG,
+ QCM2290_SLAVE_CAMERA_CFG,
+ QCM2290_SLAVE_CLK_CTL,
+ QCM2290_SLAVE_CRYPTO_0_CFG,
+ QCM2290_SLAVE_DISPLAY_CFG,
+ QCM2290_SLAVE_DISPLAY_THROTTLE_CFG,
+ QCM2290_SLAVE_GPU_CFG,
+ QCM2290_SLAVE_HWKM,
+ QCM2290_SLAVE_IMEM_CFG,
+ QCM2290_SLAVE_IPA_CFG,
+ QCM2290_SLAVE_LPASS,
+ QCM2290_SLAVE_MESSAGE_RAM,
+ QCM2290_SLAVE_PDM,
+ QCM2290_SLAVE_PIMEM_CFG,
+ QCM2290_SLAVE_PKA_WRAPPER,
+ QCM2290_SLAVE_PMIC_ARB,
+ QCM2290_SLAVE_PRNG,
+ QCM2290_SLAVE_QDSS_CFG,
+ QCM2290_SLAVE_QM_CFG,
+ QCM2290_SLAVE_QM_MPU_CFG,
+ QCM2290_SLAVE_QPIC,
+ QCM2290_SLAVE_QUP_0,
+ QCM2290_SLAVE_SDCC_1,
+ QCM2290_SLAVE_SDCC_2,
+ QCM2290_SLAVE_SNOC_CFG,
+ QCM2290_SLAVE_TCSR,
+ QCM2290_SLAVE_USB3,
+ QCM2290_SLAVE_VENUS_CFG,
+ QCM2290_SLAVE_VENUS_THROTTLE_CFG,
+ QCM2290_SLAVE_VSENSE_CTRL_CFG,
+ QCM2290_SLAVE_SERVICE_CNOC,
+ QCM2290_SLAVE_APPSS,
+ QCM2290_SLAVE_SNOC_CNOC,
+ QCM2290_SLAVE_IMEM,
+ QCM2290_SLAVE_PIMEM,
+ QCM2290_SLAVE_SNOC_BIMC,
+ QCM2290_SLAVE_SERVICE_SNOC,
+ QCM2290_SLAVE_QDSS_STM,
+ QCM2290_SLAVE_TCU,
+ QCM2290_SLAVE_ANOC_SNOC,
+ QCM2290_SLAVE_QUP_CORE_0,
+ QCM2290_SLAVE_SNOC_BIMC_NRT,
+ QCM2290_SLAVE_SNOC_BIMC_RT,
+};
+
+/* Master nodes */
+static const u16 mas_appss_proc_links[] = {
+ QCM2290_SLAVE_EBI1,
+ QCM2290_SLAVE_BIMC_SNOC,
+};
+
+static struct qcom_icc_node mas_appss_proc = {
+ .id = QCM2290_MASTER_APPSS_PROC,
+ .name = "mas_apps_proc",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 0,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.prio_level = 0,
+ .qos.areq_prio = 0,
+ .mas_rpm_id = 0,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_appss_proc_links),
+ .links = mas_appss_proc_links,
+};
+
+static const u16 mas_snoc_bimc_rt_links[] = {
+ QCM2290_SLAVE_EBI1,
+};
+
+static struct qcom_icc_node mas_snoc_bimc_rt = {
+ .id = QCM2290_MASTER_SNOC_BIMC_RT,
+ .name = "mas_snoc_bimc_rt",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 2,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .mas_rpm_id = 163,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_bimc_rt_links),
+ .links = mas_snoc_bimc_rt_links,
+};
+
+static const u16 mas_snoc_bimc_nrt_links[] = {
+ QCM2290_SLAVE_EBI1,
+};
+
+static struct qcom_icc_node mas_snoc_bimc_nrt = {
+ .id = QCM2290_MASTER_SNOC_BIMC_NRT,
+ .name = "mas_snoc_bimc_nrt",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 2,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .mas_rpm_id = 163,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_bimc_nrt_links),
+ .links = mas_snoc_bimc_nrt_links,
+};
+
+static const u16 mas_snoc_bimc_links[] = {
+ QCM2290_SLAVE_EBI1,
+};
+
+static struct qcom_icc_node mas_snoc_bimc = {
+ .id = QCM2290_MASTER_SNOC_BIMC,
+ .name = "mas_snoc_bimc",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 2,
+ .qos.qos_mode = NOC_QOS_MODE_BYPASS,
+ .mas_rpm_id = 164,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_bimc_links),
+ .links = mas_snoc_bimc_links,
+};
+
+static const u16 mas_tcu_0_links[] = {
+ QCM2290_SLAVE_EBI1,
+ QCM2290_SLAVE_BIMC_SNOC,
+};
+
+static struct qcom_icc_node mas_tcu_0 = {
+ .id = QCM2290_MASTER_TCU_0,
+ .name = "mas_tcu_0",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 4,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.prio_level = 6,
+ .qos.areq_prio = 6,
+ .mas_rpm_id = 102,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_tcu_0_links),
+ .links = mas_tcu_0_links,
+};
+
+static const u16 mas_snoc_cnoc_links[] = {
+ QCM2290_SLAVE_CAMERA_RT_THROTTLE_CFG,
+ QCM2290_SLAVE_SDCC_2,
+ QCM2290_SLAVE_SDCC_1,
+ QCM2290_SLAVE_QM_CFG,
+ QCM2290_SLAVE_BIMC_CFG,
+ QCM2290_SLAVE_USB3,
+ QCM2290_SLAVE_QM_MPU_CFG,
+ QCM2290_SLAVE_CAMERA_NRT_THROTTLE_CFG,
+ QCM2290_SLAVE_QDSS_CFG,
+ QCM2290_SLAVE_PDM,
+ QCM2290_SLAVE_IPA_CFG,
+ QCM2290_SLAVE_DISPLAY_THROTTLE_CFG,
+ QCM2290_SLAVE_TCSR,
+ QCM2290_SLAVE_MESSAGE_RAM,
+ QCM2290_SLAVE_PMIC_ARB,
+ QCM2290_SLAVE_LPASS,
+ QCM2290_SLAVE_DISPLAY_CFG,
+ QCM2290_SLAVE_VENUS_CFG,
+ QCM2290_SLAVE_GPU_CFG,
+ QCM2290_SLAVE_IMEM_CFG,
+ QCM2290_SLAVE_SNOC_CFG,
+ QCM2290_SLAVE_SERVICE_CNOC,
+ QCM2290_SLAVE_VENUS_THROTTLE_CFG,
+ QCM2290_SLAVE_PKA_WRAPPER,
+ QCM2290_SLAVE_HWKM,
+ QCM2290_SLAVE_PRNG,
+ QCM2290_SLAVE_VSENSE_CTRL_CFG,
+ QCM2290_SLAVE_CRYPTO_0_CFG,
+ QCM2290_SLAVE_PIMEM_CFG,
+ QCM2290_SLAVE_QUP_0,
+ QCM2290_SLAVE_CAMERA_CFG,
+ QCM2290_SLAVE_CLK_CTL,
+ QCM2290_SLAVE_QPIC,
+};
+
+static struct qcom_icc_node mas_snoc_cnoc = {
+ .id = QCM2290_MASTER_SNOC_CNOC,
+ .name = "mas_snoc_cnoc",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = 52,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_cnoc_links),
+ .links = mas_snoc_cnoc_links,
+};
+
+static const u16 mas_qdss_dap_links[] = {
+ QCM2290_SLAVE_CAMERA_RT_THROTTLE_CFG,
+ QCM2290_SLAVE_SDCC_2,
+ QCM2290_SLAVE_SDCC_1,
+ QCM2290_SLAVE_QM_CFG,
+ QCM2290_SLAVE_BIMC_CFG,
+ QCM2290_SLAVE_USB3,
+ QCM2290_SLAVE_QM_MPU_CFG,
+ QCM2290_SLAVE_CAMERA_NRT_THROTTLE_CFG,
+ QCM2290_SLAVE_QDSS_CFG,
+ QCM2290_SLAVE_PDM,
+ QCM2290_SLAVE_IPA_CFG,
+ QCM2290_SLAVE_DISPLAY_THROTTLE_CFG,
+ QCM2290_SLAVE_TCSR,
+ QCM2290_SLAVE_MESSAGE_RAM,
+ QCM2290_SLAVE_PMIC_ARB,
+ QCM2290_SLAVE_LPASS,
+ QCM2290_SLAVE_DISPLAY_CFG,
+ QCM2290_SLAVE_VENUS_CFG,
+ QCM2290_SLAVE_GPU_CFG,
+ QCM2290_SLAVE_IMEM_CFG,
+ QCM2290_SLAVE_SNOC_CFG,
+ QCM2290_SLAVE_SERVICE_CNOC,
+ QCM2290_SLAVE_VENUS_THROTTLE_CFG,
+ QCM2290_SLAVE_PKA_WRAPPER,
+ QCM2290_SLAVE_HWKM,
+ QCM2290_SLAVE_PRNG,
+ QCM2290_SLAVE_VSENSE_CTRL_CFG,
+ QCM2290_SLAVE_CRYPTO_0_CFG,
+ QCM2290_SLAVE_PIMEM_CFG,
+ QCM2290_SLAVE_QUP_0,
+ QCM2290_SLAVE_CAMERA_CFG,
+ QCM2290_SLAVE_CLK_CTL,
+ QCM2290_SLAVE_QPIC,
+};
+
+static struct qcom_icc_node mas_qdss_dap = {
+ .id = QCM2290_MASTER_QDSS_DAP,
+ .name = "mas_qdss_dap",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = 49,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qdss_dap_links),
+ .links = mas_qdss_dap_links,
+};
+
+static const u16 mas_crypto_core0_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC
+};
+
+static struct qcom_icc_node mas_crypto_core0 = {
+ .id = QCM2290_MASTER_CRYPTO_CORE0,
+ .name = "mas_crypto_core0",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 22,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 23,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_crypto_core0_links),
+ .links = mas_crypto_core0_links,
+};
+
+static const u16 mas_qup_core_0_links[] = {
+ QCM2290_SLAVE_QUP_CORE_0,
+};
+
+static struct qcom_icc_node mas_qup_core_0 = {
+ .id = QCM2290_MASTER_QUP_CORE_0,
+ .name = "mas_qup_core_0",
+ .buswidth = 4,
+ .mas_rpm_id = 170,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qup_core_0_links),
+ .links = mas_qup_core_0_links,
+};
+
+static const u16 mas_camnoc_sf_links[] = {
+ QCM2290_SLAVE_SNOC_BIMC_NRT,
+};
+
+static struct qcom_icc_node mas_camnoc_sf = {
+ .id = QCM2290_MASTER_CAMNOC_SF,
+ .name = "mas_camnoc_sf",
+ .buswidth = 32,
+ .qos.ap_owned = true,
+ .qos.qos_port = 4,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 3,
+ .mas_rpm_id = 172,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_camnoc_sf_links),
+ .links = mas_camnoc_sf_links,
+};
+
+static const u16 mas_camnoc_hf_links[] = {
+ QCM2290_SLAVE_SNOC_BIMC_RT,
+};
+
+static struct qcom_icc_node mas_camnoc_hf = {
+ .id = QCM2290_MASTER_CAMNOC_HF,
+ .name = "mas_camnoc_hf",
+ .buswidth = 32,
+ .qos.ap_owned = true,
+ .qos.qos_port = 10,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 3,
+ .qos.urg_fwd_en = true,
+ .mas_rpm_id = 173,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_camnoc_hf_links),
+ .links = mas_camnoc_hf_links,
+};
+
+static const u16 mas_mdp0_links[] = {
+ QCM2290_SLAVE_SNOC_BIMC_RT,
+};
+
+static struct qcom_icc_node mas_mdp0 = {
+ .id = QCM2290_MASTER_MDP0,
+ .name = "mas_mdp0",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 5,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 3,
+ .qos.urg_fwd_en = true,
+ .mas_rpm_id = 8,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_mdp0_links),
+ .links = mas_mdp0_links,
+};
+
+static const u16 mas_video_p0_links[] = {
+ QCM2290_SLAVE_SNOC_BIMC_NRT,
+};
+
+static struct qcom_icc_node mas_video_p0 = {
+ .id = QCM2290_MASTER_VIDEO_P0,
+ .name = "mas_video_p0",
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_port = 9,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 3,
+ .qos.urg_fwd_en = true,
+ .mas_rpm_id = 9,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_video_p0_links),
+ .links = mas_video_p0_links,
+};
+
+static const u16 mas_video_proc_links[] = {
+ QCM2290_SLAVE_SNOC_BIMC_NRT,
+};
+
+static struct qcom_icc_node mas_video_proc = {
+ .id = QCM2290_MASTER_VIDEO_PROC,
+ .name = "mas_video_proc",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 13,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 4,
+ .mas_rpm_id = 168,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_video_proc_links),
+ .links = mas_video_proc_links,
+};
+
+static const u16 mas_snoc_cfg_links[] = {
+ QCM2290_SLAVE_SERVICE_SNOC,
+};
+
+static struct qcom_icc_node mas_snoc_cfg = {
+ .id = QCM2290_MASTER_SNOC_CFG,
+ .name = "mas_snoc_cfg",
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = 20,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_snoc_cfg_links),
+ .links = mas_snoc_cfg_links,
+};
+
+static const u16 mas_tic_links[] = {
+ QCM2290_SLAVE_PIMEM,
+ QCM2290_SLAVE_IMEM,
+ QCM2290_SLAVE_APPSS,
+ QCM2290_SLAVE_SNOC_BIMC,
+ QCM2290_SLAVE_SNOC_CNOC,
+ QCM2290_SLAVE_TCU,
+ QCM2290_SLAVE_QDSS_STM,
+};
+
+static struct qcom_icc_node mas_tic = {
+ .id = QCM2290_MASTER_TIC,
+ .name = "mas_tic",
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_port = 8,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 51,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_tic_links),
+ .links = mas_tic_links,
+};
+
+static const u16 mas_anoc_snoc_links[] = {
+ QCM2290_SLAVE_PIMEM,
+ QCM2290_SLAVE_IMEM,
+ QCM2290_SLAVE_APPSS,
+ QCM2290_SLAVE_SNOC_BIMC,
+ QCM2290_SLAVE_SNOC_CNOC,
+ QCM2290_SLAVE_TCU,
+ QCM2290_SLAVE_QDSS_STM,
+};
+
+static struct qcom_icc_node mas_anoc_snoc = {
+ .id = QCM2290_MASTER_ANOC_SNOC,
+ .name = "mas_anoc_snoc",
+ .buswidth = 16,
+ .mas_rpm_id = 110,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_anoc_snoc_links),
+ .links = mas_anoc_snoc_links,
+};
+
+static const u16 mas_bimc_snoc_links[] = {
+ QCM2290_SLAVE_PIMEM,
+ QCM2290_SLAVE_IMEM,
+ QCM2290_SLAVE_APPSS,
+ QCM2290_SLAVE_SNOC_CNOC,
+ QCM2290_SLAVE_TCU,
+ QCM2290_SLAVE_QDSS_STM,
+};
+
+static struct qcom_icc_node mas_bimc_snoc = {
+ .id = QCM2290_MASTER_BIMC_SNOC,
+ .name = "mas_bimc_snoc",
+ .buswidth = 8,
+ .mas_rpm_id = 21,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_bimc_snoc_links),
+ .links = mas_bimc_snoc_links,
+};
+
+static const u16 mas_pimem_links[] = {
+ QCM2290_SLAVE_IMEM,
+ QCM2290_SLAVE_SNOC_BIMC,
+};
+
+static struct qcom_icc_node mas_pimem = {
+ .id = QCM2290_MASTER_PIMEM,
+ .name = "mas_pimem",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 20,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 113,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_pimem_links),
+ .links = mas_pimem_links,
+};
+
+static const u16 mas_qdss_bam_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_qdss_bam = {
+ .id = QCM2290_MASTER_QDSS_BAM,
+ .name = "mas_qdss_bam",
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_port = 2,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 19,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qdss_bam_links),
+ .links = mas_qdss_bam_links,
+};
+
+static const u16 mas_qup_0_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_qup_0 = {
+ .id = QCM2290_MASTER_QUP_0,
+ .name = "mas_qup_0",
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_port = 0,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 166,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qup_0_links),
+ .links = mas_qup_0_links,
+};
+
+static const u16 mas_ipa_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_ipa = {
+ .id = QCM2290_MASTER_IPA,
+ .name = "mas_ipa",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 3,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 59,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_ipa_links),
+ .links = mas_ipa_links,
+};
+
+static const u16 mas_qdss_etr_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_qdss_etr = {
+ .id = QCM2290_MASTER_QDSS_ETR,
+ .name = "mas_qdss_etr",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 12,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 31,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qdss_etr_links),
+ .links = mas_qdss_etr_links,
+};
+
+static const u16 mas_sdcc_1_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_sdcc_1 = {
+ .id = QCM2290_MASTER_SDCC_1,
+ .name = "mas_sdcc_1",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 17,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 33,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_sdcc_1_links),
+ .links = mas_sdcc_1_links,
+};
+
+static const u16 mas_sdcc_2_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_sdcc_2 = {
+ .id = QCM2290_MASTER_SDCC_2,
+ .name = "mas_sdcc_2",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 23,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 35,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_sdcc_2_links),
+ .links = mas_sdcc_2_links,
+};
+
+static const u16 mas_qpic_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_qpic = {
+ .id = QCM2290_MASTER_QPIC,
+ .name = "mas_qpic",
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_port = 1,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 58,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_qpic_links),
+ .links = mas_qpic_links,
+};
+
+static const u16 mas_usb3_0_links[] = {
+ QCM2290_SLAVE_ANOC_SNOC,
+};
+
+static struct qcom_icc_node mas_usb3_0 = {
+ .id = QCM2290_MASTER_USB3_0,
+ .name = "mas_usb3_0",
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_port = 24,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.areq_prio = 2,
+ .mas_rpm_id = 32,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_usb3_0_links),
+ .links = mas_usb3_0_links,
+};
+
+static const u16 mas_gfx3d_links[] = {
+ QCM2290_SLAVE_EBI1,
+};
+
+static struct qcom_icc_node mas_gfx3d = {
+ .id = QCM2290_MASTER_GFX3D,
+ .name = "mas_gfx3d",
+ .buswidth = 32,
+ .qos.ap_owned = true,
+ .qos.qos_port = 1,
+ .qos.qos_mode = NOC_QOS_MODE_FIXED,
+ .qos.prio_level = 0,
+ .qos.areq_prio = 0,
+ .mas_rpm_id = 6,
+ .slv_rpm_id = -1,
+ .num_links = ARRAY_SIZE(mas_gfx3d_links),
+ .links = mas_gfx3d_links,
+};
+
+/* Slave nodes */
+static struct qcom_icc_node slv_ebi1 = {
+ .name = "slv_ebi1",
+ .id = QCM2290_SLAVE_EBI1,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 0,
+};
+
+static const u16 slv_bimc_snoc_links[] = {
+ QCM2290_MASTER_BIMC_SNOC,
+};
+
+static struct qcom_icc_node slv_bimc_snoc = {
+ .name = "slv_bimc_snoc",
+ .id = QCM2290_SLAVE_BIMC_SNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 2,
+ .num_links = ARRAY_SIZE(slv_bimc_snoc_links),
+ .links = slv_bimc_snoc_links,
+};
+
+static struct qcom_icc_node slv_bimc_cfg = {
+ .name = "slv_bimc_cfg",
+ .id = QCM2290_SLAVE_BIMC_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 56,
+};
+
+static struct qcom_icc_node slv_camera_nrt_throttle_cfg = {
+ .name = "slv_camera_nrt_throttle_cfg",
+ .id = QCM2290_SLAVE_CAMERA_NRT_THROTTLE_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 271,
+};
+
+static struct qcom_icc_node slv_camera_rt_throttle_cfg = {
+ .name = "slv_camera_rt_throttle_cfg",
+ .id = QCM2290_SLAVE_CAMERA_RT_THROTTLE_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 279,
+};
+
+static struct qcom_icc_node slv_camera_cfg = {
+ .name = "slv_camera_cfg",
+ .id = QCM2290_SLAVE_CAMERA_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 3,
+};
+
+static struct qcom_icc_node slv_clk_ctl = {
+ .name = "slv_clk_ctl",
+ .id = QCM2290_SLAVE_CLK_CTL,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 47,
+};
+
+static struct qcom_icc_node slv_crypto_0_cfg = {
+ .name = "slv_crypto_0_cfg",
+ .id = QCM2290_SLAVE_CRYPTO_0_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 52,
+};
+
+static struct qcom_icc_node slv_display_cfg = {
+ .name = "slv_display_cfg",
+ .id = QCM2290_SLAVE_DISPLAY_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 4,
+};
+
+static struct qcom_icc_node slv_display_throttle_cfg = {
+ .name = "slv_display_throttle_cfg",
+ .id = QCM2290_SLAVE_DISPLAY_THROTTLE_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 156,
+};
+
+static struct qcom_icc_node slv_gpu_cfg = {
+ .name = "slv_gpu_cfg",
+ .id = QCM2290_SLAVE_GPU_CFG,
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 275,
+};
+
+static struct qcom_icc_node slv_hwkm = {
+ .name = "slv_hwkm",
+ .id = QCM2290_SLAVE_HWKM,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 280,
+};
+
+static struct qcom_icc_node slv_imem_cfg = {
+ .name = "slv_imem_cfg",
+ .id = QCM2290_SLAVE_IMEM_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 54,
+};
+
+static struct qcom_icc_node slv_ipa_cfg = {
+ .name = "slv_ipa_cfg",
+ .id = QCM2290_SLAVE_IPA_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 183,
+};
+
+static struct qcom_icc_node slv_lpass = {
+ .name = "slv_lpass",
+ .id = QCM2290_SLAVE_LPASS,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 21,
+};
+
+static struct qcom_icc_node slv_message_ram = {
+ .name = "slv_message_ram",
+ .id = QCM2290_SLAVE_MESSAGE_RAM,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 55,
+};
+
+static struct qcom_icc_node slv_pdm = {
+ .name = "slv_pdm",
+ .id = QCM2290_SLAVE_PDM,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 41,
+};
+
+static struct qcom_icc_node slv_pimem_cfg = {
+ .name = "slv_pimem_cfg",
+ .id = QCM2290_SLAVE_PIMEM_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 167,
+};
+
+static struct qcom_icc_node slv_pka_wrapper = {
+ .name = "slv_pka_wrapper",
+ .id = QCM2290_SLAVE_PKA_WRAPPER,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 281,
+};
+
+static struct qcom_icc_node slv_pmic_arb = {
+ .name = "slv_pmic_arb",
+ .id = QCM2290_SLAVE_PMIC_ARB,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 59,
+};
+
+static struct qcom_icc_node slv_prng = {
+ .name = "slv_prng",
+ .id = QCM2290_SLAVE_PRNG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 44,
+};
+
+static struct qcom_icc_node slv_qdss_cfg = {
+ .name = "slv_qdss_cfg",
+ .id = QCM2290_SLAVE_QDSS_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 63,
+};
+
+static struct qcom_icc_node slv_qm_cfg = {
+ .name = "slv_qm_cfg",
+ .id = QCM2290_SLAVE_QM_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 212,
+};
+
+static struct qcom_icc_node slv_qm_mpu_cfg = {
+ .name = "slv_qm_mpu_cfg",
+ .id = QCM2290_SLAVE_QM_MPU_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 231,
+};
+
+static struct qcom_icc_node slv_qpic = {
+ .name = "slv_qpic",
+ .id = QCM2290_SLAVE_QPIC,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 80,
+};
+
+static struct qcom_icc_node slv_qup_0 = {
+ .name = "slv_qup_0",
+ .id = QCM2290_SLAVE_QUP_0,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 261,
+};
+
+static struct qcom_icc_node slv_sdcc_1 = {
+ .name = "slv_sdcc_1",
+ .id = QCM2290_SLAVE_SDCC_1,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 31,
+};
+
+static struct qcom_icc_node slv_sdcc_2 = {
+ .name = "slv_sdcc_2",
+ .id = QCM2290_SLAVE_SDCC_2,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 33,
+};
+
+static const u16 slv_snoc_cfg_links[] = {
+ QCM2290_MASTER_SNOC_CFG,
+};
+
+static struct qcom_icc_node slv_snoc_cfg = {
+ .name = "slv_snoc_cfg",
+ .id = QCM2290_SLAVE_SNOC_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 70,
+ .num_links = ARRAY_SIZE(slv_snoc_cfg_links),
+ .links = slv_snoc_cfg_links,
+};
+
+static struct qcom_icc_node slv_tcsr = {
+ .name = "slv_tcsr",
+ .id = QCM2290_SLAVE_TCSR,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 50,
+};
+
+static struct qcom_icc_node slv_usb3 = {
+ .name = "slv_usb3",
+ .id = QCM2290_SLAVE_USB3,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 22,
+};
+
+static struct qcom_icc_node slv_venus_cfg = {
+ .name = "slv_venus_cfg",
+ .id = QCM2290_SLAVE_VENUS_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 10,
+};
+
+static struct qcom_icc_node slv_venus_throttle_cfg = {
+ .name = "slv_venus_throttle_cfg",
+ .id = QCM2290_SLAVE_VENUS_THROTTLE_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 178,
+};
+
+static struct qcom_icc_node slv_vsense_ctrl_cfg = {
+ .name = "slv_vsense_ctrl_cfg",
+ .id = QCM2290_SLAVE_VSENSE_CTRL_CFG,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 263,
+};
+
+static struct qcom_icc_node slv_service_cnoc = {
+ .name = "slv_service_cnoc",
+ .id = QCM2290_SLAVE_SERVICE_CNOC,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 76,
+};
+
+static struct qcom_icc_node slv_qup_core_0 = {
+ .name = "slv_qup_core_0",
+ .id = QCM2290_SLAVE_QUP_CORE_0,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 264,
+};
+
+static const u16 slv_snoc_bimc_nrt_links[] = {
+ QCM2290_MASTER_SNOC_BIMC_NRT,
+};
+
+static struct qcom_icc_node slv_snoc_bimc_nrt = {
+ .name = "slv_snoc_bimc_nrt",
+ .id = QCM2290_SLAVE_SNOC_BIMC_NRT,
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 259,
+ .num_links = ARRAY_SIZE(slv_snoc_bimc_nrt_links),
+ .links = slv_snoc_bimc_nrt_links,
+};
+
+static const u16 slv_snoc_bimc_rt_links[] = {
+ QCM2290_MASTER_SNOC_BIMC_RT,
+};
+
+static struct qcom_icc_node slv_snoc_bimc_rt = {
+ .name = "slv_snoc_bimc_rt",
+ .id = QCM2290_SLAVE_SNOC_BIMC_RT,
+ .buswidth = 16,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 260,
+ .num_links = ARRAY_SIZE(slv_snoc_bimc_rt_links),
+ .links = slv_snoc_bimc_rt_links,
+};
+
+static struct qcom_icc_node slv_appss = {
+ .name = "slv_appss",
+ .id = QCM2290_SLAVE_APPSS,
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 20,
+};
+
+static const u16 slv_snoc_cnoc_links[] = {
+ QCM2290_MASTER_SNOC_CNOC,
+};
+
+static struct qcom_icc_node slv_snoc_cnoc = {
+ .name = "slv_snoc_cnoc",
+ .id = QCM2290_SLAVE_SNOC_CNOC,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 25,
+ .num_links = ARRAY_SIZE(slv_snoc_cnoc_links),
+ .links = slv_snoc_cnoc_links,
+};
+
+static struct qcom_icc_node slv_imem = {
+ .name = "slv_imem",
+ .id = QCM2290_SLAVE_IMEM,
+ .buswidth = 8,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 26,
+};
+
+static struct qcom_icc_node slv_pimem = {
+ .name = "slv_pimem",
+ .id = QCM2290_SLAVE_PIMEM,
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 166,
+};
+
+static const u16 slv_snoc_bimc_links[] = {
+ QCM2290_MASTER_SNOC_BIMC,
+};
+
+static struct qcom_icc_node slv_snoc_bimc = {
+ .name = "slv_snoc_bimc",
+ .id = QCM2290_SLAVE_SNOC_BIMC,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 24,
+ .num_links = ARRAY_SIZE(slv_snoc_bimc_links),
+ .links = slv_snoc_bimc_links,
+};
+
+static struct qcom_icc_node slv_service_snoc = {
+ .name = "slv_service_snoc",
+ .id = QCM2290_SLAVE_SERVICE_SNOC,
+ .buswidth = 4,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 29,
+};
+
+static struct qcom_icc_node slv_qdss_stm = {
+ .name = "slv_qdss_stm",
+ .id = QCM2290_SLAVE_QDSS_STM,
+ .buswidth = 4,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 30,
+};
+
+static struct qcom_icc_node slv_tcu = {
+ .name = "slv_tcu",
+ .id = QCM2290_SLAVE_TCU,
+ .buswidth = 8,
+ .qos.ap_owned = true,
+ .qos.qos_mode = NOC_QOS_MODE_INVALID,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 133,
+};
+
+static const u16 slv_anoc_snoc_links[] = {
+ QCM2290_MASTER_ANOC_SNOC,
+};
+
+static struct qcom_icc_node slv_anoc_snoc = {
+ .name = "slv_anoc_snoc",
+ .id = QCM2290_SLAVE_ANOC_SNOC,
+ .buswidth = 16,
+ .mas_rpm_id = -1,
+ .slv_rpm_id = 141,
+ .num_links = ARRAY_SIZE(slv_anoc_snoc_links),
+ .links = slv_anoc_snoc_links,
+};
+
+/* NoC descriptors */
+static struct qcom_icc_node *qcm2290_bimc_nodes[] = {
+ [MASTER_APPSS_PROC] = &mas_appss_proc,
+ [MASTER_SNOC_BIMC_RT] = &mas_snoc_bimc_rt,
+ [MASTER_SNOC_BIMC_NRT] = &mas_snoc_bimc_nrt,
+ [MASTER_SNOC_BIMC] = &mas_snoc_bimc,
+ [MASTER_TCU_0] = &mas_tcu_0,
+ [MASTER_GFX3D] = &mas_gfx3d,
+ [SLAVE_EBI1] = &slv_ebi1,
+ [SLAVE_BIMC_SNOC] = &slv_bimc_snoc,
+};
+
+static const struct regmap_config qcm2290_bimc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x80000,
+ .fast_io = true,
+};
+
+static struct qcom_icc_desc qcm2290_bimc = {
+ .type = QCOM_ICC_BIMC,
+ .nodes = qcm2290_bimc_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_bimc_nodes),
+ .regmap_cfg = &qcm2290_bimc_regmap_config,
+ /* M_REG_BASE() in vendor msm_bus_bimc_adhoc driver */
+ .qos_offset = 0x8000,
+};
+
+static struct qcom_icc_node *qcm2290_cnoc_nodes[] = {
+ [MASTER_SNOC_CNOC] = &mas_snoc_cnoc,
+ [MASTER_QDSS_DAP] = &mas_qdss_dap,
+ [SLAVE_BIMC_CFG] = &slv_bimc_cfg,
+ [SLAVE_CAMERA_NRT_THROTTLE_CFG] = &slv_camera_nrt_throttle_cfg,
+ [SLAVE_CAMERA_RT_THROTTLE_CFG] = &slv_camera_rt_throttle_cfg,
+ [SLAVE_CAMERA_CFG] = &slv_camera_cfg,
+ [SLAVE_CLK_CTL] = &slv_clk_ctl,
+ [SLAVE_CRYPTO_0_CFG] = &slv_crypto_0_cfg,
+ [SLAVE_DISPLAY_CFG] = &slv_display_cfg,
+ [SLAVE_DISPLAY_THROTTLE_CFG] = &slv_display_throttle_cfg,
+ [SLAVE_GPU_CFG] = &slv_gpu_cfg,
+ [SLAVE_HWKM] = &slv_hwkm,
+ [SLAVE_IMEM_CFG] = &slv_imem_cfg,
+ [SLAVE_IPA_CFG] = &slv_ipa_cfg,
+ [SLAVE_LPASS] = &slv_lpass,
+ [SLAVE_MESSAGE_RAM] = &slv_message_ram,
+ [SLAVE_PDM] = &slv_pdm,
+ [SLAVE_PIMEM_CFG] = &slv_pimem_cfg,
+ [SLAVE_PKA_WRAPPER] = &slv_pka_wrapper,
+ [SLAVE_PMIC_ARB] = &slv_pmic_arb,
+ [SLAVE_PRNG] = &slv_prng,
+ [SLAVE_QDSS_CFG] = &slv_qdss_cfg,
+ [SLAVE_QM_CFG] = &slv_qm_cfg,
+ [SLAVE_QM_MPU_CFG] = &slv_qm_mpu_cfg,
+ [SLAVE_QPIC] = &slv_qpic,
+ [SLAVE_QUP_0] = &slv_qup_0,
+ [SLAVE_SDCC_1] = &slv_sdcc_1,
+ [SLAVE_SDCC_2] = &slv_sdcc_2,
+ [SLAVE_SNOC_CFG] = &slv_snoc_cfg,
+ [SLAVE_TCSR] = &slv_tcsr,
+ [SLAVE_USB3] = &slv_usb3,
+ [SLAVE_VENUS_CFG] = &slv_venus_cfg,
+ [SLAVE_VENUS_THROTTLE_CFG] = &slv_venus_throttle_cfg,
+ [SLAVE_VSENSE_CTRL_CFG] = &slv_vsense_ctrl_cfg,
+ [SLAVE_SERVICE_CNOC] = &slv_service_cnoc,
+};
+
+static const struct regmap_config qcm2290_cnoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x8200,
+ .fast_io = true,
+};
+
+static struct qcom_icc_desc qcm2290_cnoc = {
+ .type = QCOM_ICC_NOC,
+ .nodes = qcm2290_cnoc_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_cnoc_nodes),
+ .regmap_cfg = &qcm2290_cnoc_regmap_config,
+};
+
+static struct qcom_icc_node *qcm2290_snoc_nodes[] = {
+ [MASTER_CRYPTO_CORE0] = &mas_crypto_core0,
+ [MASTER_SNOC_CFG] = &mas_snoc_cfg,
+ [MASTER_TIC] = &mas_tic,
+ [MASTER_ANOC_SNOC] = &mas_anoc_snoc,
+ [MASTER_BIMC_SNOC] = &mas_bimc_snoc,
+ [MASTER_PIMEM] = &mas_pimem,
+ [MASTER_QDSS_BAM] = &mas_qdss_bam,
+ [MASTER_QUP_0] = &mas_qup_0,
+ [MASTER_IPA] = &mas_ipa,
+ [MASTER_QDSS_ETR] = &mas_qdss_etr,
+ [MASTER_SDCC_1] = &mas_sdcc_1,
+ [MASTER_SDCC_2] = &mas_sdcc_2,
+ [MASTER_QPIC] = &mas_qpic,
+ [MASTER_USB3_0] = &mas_usb3_0,
+ [SLAVE_APPSS] = &slv_appss,
+ [SLAVE_SNOC_CNOC] = &slv_snoc_cnoc,
+ [SLAVE_IMEM] = &slv_imem,
+ [SLAVE_PIMEM] = &slv_pimem,
+ [SLAVE_SNOC_BIMC] = &slv_snoc_bimc,
+ [SLAVE_SERVICE_SNOC] = &slv_service_snoc,
+ [SLAVE_QDSS_STM] = &slv_qdss_stm,
+ [SLAVE_TCU] = &slv_tcu,
+ [SLAVE_ANOC_SNOC] = &slv_anoc_snoc,
+};
+
+static const struct regmap_config qcm2290_snoc_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .max_register = 0x60200,
+ .fast_io = true,
+};
+
+static struct qcom_icc_desc qcm2290_snoc = {
+ .type = QCOM_ICC_QNOC,
+ .nodes = qcm2290_snoc_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_snoc_nodes),
+ .regmap_cfg = &qcm2290_snoc_regmap_config,
+ /* Vendor DT node fab-sys_noc property 'qcom,base-offset' */
+ .qos_offset = 0x15000,
+};
+
+static struct qcom_icc_node *qcm2290_qup_virt_nodes[] = {
+ [MASTER_QUP_CORE_0] = &mas_qup_core_0,
+ [SLAVE_QUP_CORE_0] = &slv_qup_core_0
+};
+
+static struct qcom_icc_desc qcm2290_qup_virt = {
+ .type = QCOM_ICC_QNOC,
+ .nodes = qcm2290_qup_virt_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_qup_virt_nodes),
+};
+
+static struct qcom_icc_node *qcm2290_mmnrt_virt_nodes[] = {
+ [MASTER_CAMNOC_SF] = &mas_camnoc_sf,
+ [MASTER_VIDEO_P0] = &mas_video_p0,
+ [MASTER_VIDEO_PROC] = &mas_video_proc,
+ [SLAVE_SNOC_BIMC_NRT] = &slv_snoc_bimc_nrt,
+};
+
+static struct qcom_icc_desc qcm2290_mmnrt_virt = {
+ .type = QCOM_ICC_QNOC,
+ .nodes = qcm2290_mmnrt_virt_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_mmnrt_virt_nodes),
+ .regmap_cfg = &qcm2290_snoc_regmap_config,
+ .qos_offset = 0x15000,
+};
+
+static struct qcom_icc_node *qcm2290_mmrt_virt_nodes[] = {
+ [MASTER_CAMNOC_HF] = &mas_camnoc_hf,
+ [MASTER_MDP0] = &mas_mdp0,
+ [SLAVE_SNOC_BIMC_RT] = &slv_snoc_bimc_rt,
+};
+
+static struct qcom_icc_desc qcm2290_mmrt_virt = {
+ .type = QCOM_ICC_QNOC,
+ .nodes = qcm2290_mmrt_virt_nodes,
+ .num_nodes = ARRAY_SIZE(qcm2290_mmrt_virt_nodes),
+ .regmap_cfg = &qcm2290_snoc_regmap_config,
+ .qos_offset = 0x15000,
+};
+
+static const struct of_device_id qcm2290_noc_of_match[] = {
+ { .compatible = "qcom,qcm2290-bimc", .data = &qcm2290_bimc },
+ { .compatible = "qcom,qcm2290-cnoc", .data = &qcm2290_cnoc },
+ { .compatible = "qcom,qcm2290-snoc", .data = &qcm2290_snoc },
+ { .compatible = "qcom,qcm2290-qup-virt", .data = &qcm2290_qup_virt },
+ { .compatible = "qcom,qcm2290-mmrt-virt", .data = &qcm2290_mmrt_virt },
+ { .compatible = "qcom,qcm2290-mmnrt-virt", .data = &qcm2290_mmnrt_virt },
+ { },
+};
+MODULE_DEVICE_TABLE(of, qcm2290_noc_of_match);
+
+static struct platform_driver qcm2290_noc_driver = {
+ .probe = qnoc_probe,
+ .remove = qnoc_remove,
+ .driver = {
+ .name = "qnoc-qcm2290",
+ .of_match_table = qcm2290_noc_of_match,
+ },
+};
+module_platform_driver(qcm2290_noc_driver);
+
+MODULE_DESCRIPTION("Qualcomm QCM2290 NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/sc7280.h b/drivers/interconnect/qcom/sc7280.h
index 175e400305c5..1fb9839b2c14 100644
--- a/drivers/interconnect/qcom/sc7280.h
+++ b/drivers/interconnect/qcom/sc7280.h
@@ -150,5 +150,7 @@
#define SC7280_SLAVE_PCIE_1 139
#define SC7280_SLAVE_QDSS_STM 140
#define SC7280_SLAVE_TCU 141
+#define SC7280_MASTER_EPSS_L3_APPS 142
+#define SC7280_SLAVE_EPSS_L3 143
#endif
diff --git a/drivers/interconnect/qcom/sdm660.c b/drivers/interconnect/qcom/sdm660.c
index 471bb88f8828..274a7139fe1a 100644
--- a/drivers/interconnect/qcom/sdm660.c
+++ b/drivers/interconnect/qcom/sdm660.c
@@ -1513,6 +1513,7 @@ static const struct regmap_config sdm660_a2noc_regmap_config = {
};
static struct qcom_icc_desc sdm660_a2noc = {
+ .type = QCOM_ICC_NOC,
.nodes = sdm660_a2noc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_a2noc_nodes),
.clocks = bus_a2noc_clocks,
@@ -1540,9 +1541,9 @@ static const struct regmap_config sdm660_bimc_regmap_config = {
};
static struct qcom_icc_desc sdm660_bimc = {
+ .type = QCOM_ICC_BIMC,
.nodes = sdm660_bimc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_bimc_nodes),
- .is_bimc_node = true,
.regmap_cfg = &sdm660_bimc_regmap_config,
};
@@ -1594,6 +1595,7 @@ static const struct regmap_config sdm660_cnoc_regmap_config = {
};
static struct qcom_icc_desc sdm660_cnoc = {
+ .type = QCOM_ICC_NOC,
.nodes = sdm660_cnoc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_cnoc_nodes),
.regmap_cfg = &sdm660_cnoc_regmap_config,
@@ -1614,6 +1616,7 @@ static const struct regmap_config sdm660_gnoc_regmap_config = {
};
static struct qcom_icc_desc sdm660_gnoc = {
+ .type = QCOM_ICC_NOC,
.nodes = sdm660_gnoc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_gnoc_nodes),
.regmap_cfg = &sdm660_gnoc_regmap_config,
@@ -1653,6 +1656,7 @@ static const struct regmap_config sdm660_mnoc_regmap_config = {
};
static struct qcom_icc_desc sdm660_mnoc = {
+ .type = QCOM_ICC_NOC,
.nodes = sdm660_mnoc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_mnoc_nodes),
.clocks = bus_mm_clocks,
@@ -1689,6 +1693,7 @@ static const struct regmap_config sdm660_snoc_regmap_config = {
};
static struct qcom_icc_desc sdm660_snoc = {
+ .type = QCOM_ICC_NOC,
.nodes = sdm660_snoc_nodes,
.num_nodes = ARRAY_SIZE(sdm660_snoc_nodes),
.regmap_cfg = &sdm660_snoc_regmap_config,
diff --git a/drivers/interconnect/qcom/sm8150.c b/drivers/interconnect/qcom/sm8150.c
index 2a85f53802b5..745e3c36a61a 100644
--- a/drivers/interconnect/qcom/sm8150.c
+++ b/drivers/interconnect/qcom/sm8150.c
@@ -535,7 +535,6 @@ static struct platform_driver qnoc_driver = {
.driver = {
.name = "qnoc-sm8150",
.of_match_table = qnoc_of_match,
- .sync_state = icc_sync_state,
},
};
module_platform_driver(qnoc_driver);
diff --git a/drivers/interconnect/qcom/sm8250.c b/drivers/interconnect/qcom/sm8250.c
index 8dfb5dea562a..aa707582ea01 100644
--- a/drivers/interconnect/qcom/sm8250.c
+++ b/drivers/interconnect/qcom/sm8250.c
@@ -551,7 +551,6 @@ static struct platform_driver qnoc_driver = {
.driver = {
.name = "qnoc-sm8250",
.of_match_table = qnoc_of_match,
- .sync_state = icc_sync_state,
},
};
module_platform_driver(qnoc_driver);
diff --git a/drivers/interconnect/qcom/sm8350.c b/drivers/interconnect/qcom/sm8350.c
index 3e26a2175b28..c79f93a1ac73 100644
--- a/drivers/interconnect/qcom/sm8350.c
+++ b/drivers/interconnect/qcom/sm8350.c
@@ -531,7 +531,6 @@ static struct platform_driver qnoc_driver = {
.driver = {
.name = "qnoc-sm8350",
.of_match_table = qnoc_of_match,
- .sync_state = icc_sync_state,
},
};
module_platform_driver(qnoc_driver);
diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c
new file mode 100644
index 000000000000..8d99ee6421df
--- /dev/null
+++ b/drivers/interconnect/qcom/sm8450.c
@@ -0,0 +1,1987 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Limited
+ */
+
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/interconnect-provider.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <dt-bindings/interconnect/qcom,sm8450.h>
+
+#include "bcm-voter.h"
+#include "icc-rpmh.h"
+#include "sm8450.h"
+
+static struct qcom_icc_node qhm_qspi = {
+ .name = "qhm_qspi",
+ .id = SM8450_MASTER_QSPI_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node qhm_qup1 = {
+ .name = "qhm_qup1",
+ .id = SM8450_MASTER_QUP_1,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node qnm_a1noc_cfg = {
+ .name = "qnm_a1noc_cfg",
+ .id = SM8450_MASTER_A1NOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_A1NOC },
+};
+
+static struct qcom_icc_node xm_sdc4 = {
+ .name = "xm_sdc4",
+ .id = SM8450_MASTER_SDCC_4,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node xm_ufs_mem = {
+ .name = "xm_ufs_mem",
+ .id = SM8450_MASTER_UFS_MEM,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node xm_usb3_0 = {
+ .name = "xm_usb3_0",
+ .id = SM8450_MASTER_USB3_0,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node qhm_qdss_bam = {
+ .name = "qhm_qdss_bam",
+ .id = SM8450_MASTER_QDSS_BAM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qhm_qup0 = {
+ .name = "qhm_qup0",
+ .id = SM8450_MASTER_QUP_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qhm_qup2 = {
+ .name = "qhm_qup2",
+ .id = SM8450_MASTER_QUP_2,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qnm_a2noc_cfg = {
+ .name = "qnm_a2noc_cfg",
+ .id = SM8450_MASTER_A2NOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_A2NOC },
+};
+
+static struct qcom_icc_node qxm_crypto = {
+ .name = "qxm_crypto",
+ .id = SM8450_MASTER_CRYPTO,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qxm_ipa = {
+ .name = "qxm_ipa",
+ .id = SM8450_MASTER_IPA,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qxm_sensorss_q6 = {
+ .name = "qxm_sensorss_q6",
+ .id = SM8450_MASTER_SENSORS_PROC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qxm_sp = {
+ .name = "qxm_sp",
+ .id = SM8450_MASTER_SP,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node xm_qdss_etr_0 = {
+ .name = "xm_qdss_etr_0",
+ .id = SM8450_MASTER_QDSS_ETR,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node xm_qdss_etr_1 = {
+ .name = "xm_qdss_etr_1",
+ .id = SM8450_MASTER_QDSS_ETR_1,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node xm_sdc2 = {
+ .name = "xm_sdc2",
+ .id = SM8450_MASTER_SDCC_2,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node qup0_core_master = {
+ .name = "qup0_core_master",
+ .id = SM8450_MASTER_QUP_CORE_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_QUP_CORE_0 },
+};
+
+static struct qcom_icc_node qup1_core_master = {
+ .name = "qup1_core_master",
+ .id = SM8450_MASTER_QUP_CORE_1,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_QUP_CORE_1 },
+};
+
+static struct qcom_icc_node qup2_core_master = {
+ .name = "qup2_core_master",
+ .id = SM8450_MASTER_QUP_CORE_2,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_QUP_CORE_2 },
+};
+
+static struct qcom_icc_node qnm_gemnoc_cnoc = {
+ .name = "qnm_gemnoc_cnoc",
+ .id = SM8450_MASTER_GEM_NOC_CNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 51,
+ .links = { SM8450_SLAVE_AHB2PHY_SOUTH, SM8450_SLAVE_AHB2PHY_NORTH,
+ SM8450_SLAVE_AOSS, SM8450_SLAVE_CAMERA_CFG,
+ SM8450_SLAVE_CLK_CTL, SM8450_SLAVE_CDSP_CFG,
+ SM8450_SLAVE_RBCPR_CX_CFG, SM8450_SLAVE_RBCPR_MMCX_CFG,
+ SM8450_SLAVE_RBCPR_MXA_CFG, SM8450_SLAVE_RBCPR_MXC_CFG,
+ SM8450_SLAVE_CRYPTO_0_CFG, SM8450_SLAVE_CX_RDPM,
+ SM8450_SLAVE_DISPLAY_CFG, SM8450_SLAVE_GFX3D_CFG,
+ SM8450_SLAVE_IMEM_CFG, SM8450_SLAVE_IPA_CFG,
+ SM8450_SLAVE_IPC_ROUTER_CFG, SM8450_SLAVE_LPASS,
+ SM8450_SLAVE_CNOC_MSS, SM8450_SLAVE_MX_RDPM,
+ SM8450_SLAVE_PCIE_0_CFG, SM8450_SLAVE_PCIE_1_CFG,
+ SM8450_SLAVE_PDM, SM8450_SLAVE_PIMEM_CFG,
+ SM8450_SLAVE_PRNG, SM8450_SLAVE_QDSS_CFG,
+ SM8450_SLAVE_QSPI_0, SM8450_SLAVE_QUP_0,
+ SM8450_SLAVE_QUP_1, SM8450_SLAVE_QUP_2,
+ SM8450_SLAVE_SDCC_2, SM8450_SLAVE_SDCC_4,
+ SM8450_SLAVE_SPSS_CFG, SM8450_SLAVE_TCSR,
+ SM8450_SLAVE_TLMM, SM8450_SLAVE_TME_CFG,
+ SM8450_SLAVE_UFS_MEM_CFG, SM8450_SLAVE_USB3_0,
+ SM8450_SLAVE_VENUS_CFG, SM8450_SLAVE_VSENSE_CTRL_CFG,
+ SM8450_SLAVE_A1NOC_CFG, SM8450_SLAVE_A2NOC_CFG,
+ SM8450_SLAVE_DDRSS_CFG, SM8450_SLAVE_CNOC_MNOC_CFG,
+ SM8450_SLAVE_PCIE_ANOC_CFG, SM8450_SLAVE_SNOC_CFG,
+ SM8450_SLAVE_IMEM, SM8450_SLAVE_PIMEM,
+ SM8450_SLAVE_SERVICE_CNOC, SM8450_SLAVE_QDSS_STM,
+ SM8450_SLAVE_TCU },
+};
+
+static struct qcom_icc_node qnm_gemnoc_pcie = {
+ .name = "qnm_gemnoc_pcie",
+ .id = SM8450_MASTER_GEM_NOC_PCIE_SNOC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_PCIE_0, SM8450_SLAVE_PCIE_1 },
+};
+
+static struct qcom_icc_node alm_gpu_tcu = {
+ .name = "alm_gpu_tcu",
+ .id = SM8450_MASTER_GPU_TCU,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node alm_sys_tcu = {
+ .name = "alm_sys_tcu",
+ .id = SM8450_MASTER_SYS_TCU,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node chm_apps = {
+ .name = "chm_apps",
+ .id = SM8450_MASTER_APPSS_PROC,
+ .channels = 3,
+ .buswidth = 32,
+ .num_links = 3,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC,
+ SM8450_SLAVE_MEM_NOC_PCIE_SNOC },
+};
+
+static struct qcom_icc_node qnm_gpu = {
+ .name = "qnm_gpu",
+ .id = SM8450_MASTER_GFX3D,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_mdsp = {
+ .name = "qnm_mdsp",
+ .id = SM8450_MASTER_MSS_PROC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 3,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC,
+ SM8450_SLAVE_MEM_NOC_PCIE_SNOC },
+};
+
+static struct qcom_icc_node qnm_mnoc_hf = {
+ .name = "qnm_mnoc_hf",
+ .id = SM8450_MASTER_MNOC_HF_MEM_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_mnoc_sf = {
+ .name = "qnm_mnoc_sf",
+ .id = SM8450_MASTER_MNOC_SF_MEM_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_nsp_gemnoc = {
+ .name = "qnm_nsp_gemnoc",
+ .id = SM8450_MASTER_COMPUTE_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_pcie = {
+ .name = "qnm_pcie",
+ .id = SM8450_MASTER_ANOC_PCIE_GEM_NOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 2,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_snoc_gc = {
+ .name = "qnm_snoc_gc",
+ .id = SM8450_MASTER_SNOC_GC_MEM_NOC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_LLCC },
+};
+
+static struct qcom_icc_node qnm_snoc_sf = {
+ .name = "qnm_snoc_sf",
+ .id = SM8450_MASTER_SNOC_SF_MEM_NOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 3,
+ .links = { SM8450_SLAVE_GEM_NOC_CNOC, SM8450_SLAVE_LLCC,
+ SM8450_SLAVE_MEM_NOC_PCIE_SNOC },
+};
+
+static struct qcom_icc_node qhm_config_noc = {
+ .name = "qhm_config_noc",
+ .id = SM8450_MASTER_CNOC_LPASS_AG_NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 6,
+ .links = { SM8450_SLAVE_LPASS_CORE_CFG, SM8450_SLAVE_LPASS_LPI_CFG,
+ SM8450_SLAVE_LPASS_MPU_CFG, SM8450_SLAVE_LPASS_TOP_CFG,
+ SM8450_SLAVE_SERVICES_LPASS_AML_NOC, SM8450_SLAVE_SERVICE_LPASS_AG_NOC },
+};
+
+static struct qcom_icc_node qxm_lpass_dsp = {
+ .name = "qxm_lpass_dsp",
+ .id = SM8450_MASTER_LPASS_PROC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 4,
+ .links = { SM8450_SLAVE_LPASS_TOP_CFG, SM8450_SLAVE_LPASS_SNOC,
+ SM8450_SLAVE_SERVICES_LPASS_AML_NOC, SM8450_SLAVE_SERVICE_LPASS_AG_NOC },
+};
+
+static struct qcom_icc_node llcc_mc = {
+ .name = "llcc_mc",
+ .id = SM8450_MASTER_LLCC,
+ .channels = 4,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_EBI1 },
+};
+
+static struct qcom_icc_node qnm_camnoc_hf = {
+ .name = "qnm_camnoc_hf",
+ .id = SM8450_MASTER_CAMNOC_HF,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_HF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_camnoc_icp = {
+ .name = "qnm_camnoc_icp",
+ .id = SM8450_MASTER_CAMNOC_ICP,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_camnoc_sf = {
+ .name = "qnm_camnoc_sf",
+ .id = SM8450_MASTER_CAMNOC_SF,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_mdp = {
+ .name = "qnm_mdp",
+ .id = SM8450_MASTER_MDP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_HF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_mnoc_cfg = {
+ .name = "qnm_mnoc_cfg",
+ .id = SM8450_MASTER_CNOC_MNOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_MNOC },
+};
+
+static struct qcom_icc_node qnm_rot = {
+ .name = "qnm_rot",
+ .id = SM8450_MASTER_ROTATOR,
+ .channels = 1,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_vapss_hcp = {
+ .name = "qnm_vapss_hcp",
+ .id = SM8450_MASTER_CDSP_HCP,
+ .channels = 1,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_video = {
+ .name = "qnm_video",
+ .id = SM8450_MASTER_VIDEO,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_video_cv_cpu = {
+ .name = "qnm_video_cv_cpu",
+ .id = SM8450_MASTER_VIDEO_CV_PROC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_video_cvp = {
+ .name = "qnm_video_cvp",
+ .id = SM8450_MASTER_VIDEO_PROC,
+ .channels = 1,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_video_v_cpu = {
+ .name = "qnm_video_v_cpu",
+ .id = SM8450_MASTER_VIDEO_V_PROC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node qhm_nsp_noc_config = {
+ .name = "qhm_nsp_noc_config",
+ .id = SM8450_MASTER_CDSP_NOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_NSP_NOC },
+};
+
+static struct qcom_icc_node qxm_nsp = {
+ .name = "qxm_nsp",
+ .id = SM8450_MASTER_CDSP_PROC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_CDSP_MEM_NOC },
+};
+
+static struct qcom_icc_node qnm_pcie_anoc_cfg = {
+ .name = "qnm_pcie_anoc_cfg",
+ .id = SM8450_MASTER_PCIE_ANOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_PCIE_ANOC },
+};
+
+static struct qcom_icc_node xm_pcie3_0 = {
+ .name = "xm_pcie3_0",
+ .id = SM8450_MASTER_PCIE_0,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_ANOC_PCIE_GEM_NOC },
+};
+
+static struct qcom_icc_node xm_pcie3_1 = {
+ .name = "xm_pcie3_1",
+ .id = SM8450_MASTER_PCIE_1,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_ANOC_PCIE_GEM_NOC },
+};
+
+static struct qcom_icc_node qhm_gic = {
+ .name = "qhm_gic",
+ .id = SM8450_MASTER_GIC_AHB,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_SF },
+};
+
+static struct qcom_icc_node qnm_aggre1_noc = {
+ .name = "qnm_aggre1_noc",
+ .id = SM8450_MASTER_A1NOC_SNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_SF },
+};
+
+static struct qcom_icc_node qnm_aggre2_noc = {
+ .name = "qnm_aggre2_noc",
+ .id = SM8450_MASTER_A2NOC_SNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_SF },
+};
+
+static struct qcom_icc_node qnm_lpass_noc = {
+ .name = "qnm_lpass_noc",
+ .id = SM8450_MASTER_LPASS_ANOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_SF },
+};
+
+static struct qcom_icc_node qnm_snoc_cfg = {
+ .name = "qnm_snoc_cfg",
+ .id = SM8450_MASTER_SNOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SERVICE_SNOC },
+};
+
+static struct qcom_icc_node qxm_pimem = {
+ .name = "qxm_pimem",
+ .id = SM8450_MASTER_PIMEM,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_GC },
+};
+
+static struct qcom_icc_node xm_gic = {
+ .name = "xm_gic",
+ .id = SM8450_MASTER_GIC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_SNOC_GEM_NOC_GC },
+};
+
+static struct qcom_icc_node qnm_mnoc_hf_disp = {
+ .name = "qnm_mnoc_hf_disp",
+ .id = SM8450_MASTER_MNOC_HF_MEM_NOC_DISP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_LLCC_DISP },
+};
+
+static struct qcom_icc_node qnm_mnoc_sf_disp = {
+ .name = "qnm_mnoc_sf_disp",
+ .id = SM8450_MASTER_MNOC_SF_MEM_NOC_DISP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_LLCC_DISP },
+};
+
+static struct qcom_icc_node qnm_pcie_disp = {
+ .name = "qnm_pcie_disp",
+ .id = SM8450_MASTER_ANOC_PCIE_GEM_NOC_DISP,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_LLCC_DISP },
+};
+
+static struct qcom_icc_node llcc_mc_disp = {
+ .name = "llcc_mc_disp",
+ .id = SM8450_MASTER_LLCC_DISP,
+ .channels = 4,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_EBI1_DISP },
+};
+
+static struct qcom_icc_node qnm_mdp_disp = {
+ .name = "qnm_mdp_disp",
+ .id = SM8450_MASTER_MDP_DISP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_HF_MEM_NOC_DISP },
+};
+
+static struct qcom_icc_node qnm_rot_disp = {
+ .name = "qnm_rot_disp",
+ .id = SM8450_MASTER_ROTATOR_DISP,
+ .channels = 1,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_SLAVE_MNOC_SF_MEM_NOC_DISP },
+};
+
+static struct qcom_icc_node qns_a1noc_snoc = {
+ .name = "qns_a1noc_snoc",
+ .id = SM8450_SLAVE_A1NOC_SNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_A1NOC_SNOC },
+};
+
+static struct qcom_icc_node srvc_aggre1_noc = {
+ .name = "srvc_aggre1_noc",
+ .id = SM8450_SLAVE_SERVICE_A1NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_a2noc_snoc = {
+ .name = "qns_a2noc_snoc",
+ .id = SM8450_SLAVE_A2NOC_SNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_A2NOC_SNOC },
+};
+
+static struct qcom_icc_node srvc_aggre2_noc = {
+ .name = "srvc_aggre2_noc",
+ .id = SM8450_SLAVE_SERVICE_A2NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qup0_core_slave = {
+ .name = "qup0_core_slave",
+ .id = SM8450_SLAVE_QUP_CORE_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qup1_core_slave = {
+ .name = "qup1_core_slave",
+ .id = SM8450_SLAVE_QUP_CORE_1,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qup2_core_slave = {
+ .name = "qup2_core_slave",
+ .id = SM8450_SLAVE_QUP_CORE_2,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_ahb2phy0 = {
+ .name = "qhs_ahb2phy0",
+ .id = SM8450_SLAVE_AHB2PHY_SOUTH,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_ahb2phy1 = {
+ .name = "qhs_ahb2phy1",
+ .id = SM8450_SLAVE_AHB2PHY_NORTH,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_aoss = {
+ .name = "qhs_aoss",
+ .id = SM8450_SLAVE_AOSS,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_camera_cfg = {
+ .name = "qhs_camera_cfg",
+ .id = SM8450_SLAVE_CAMERA_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_clk_ctl = {
+ .name = "qhs_clk_ctl",
+ .id = SM8450_SLAVE_CLK_CTL,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_compute_cfg = {
+ .name = "qhs_compute_cfg",
+ .id = SM8450_SLAVE_CDSP_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { MASTER_CDSP_NOC_CFG },
+};
+
+static struct qcom_icc_node qhs_cpr_cx = {
+ .name = "qhs_cpr_cx",
+ .id = SM8450_SLAVE_RBCPR_CX_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_cpr_mmcx = {
+ .name = "qhs_cpr_mmcx",
+ .id = SM8450_SLAVE_RBCPR_MMCX_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_cpr_mxa = {
+ .name = "qhs_cpr_mxa",
+ .id = SM8450_SLAVE_RBCPR_MXA_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_cpr_mxc = {
+ .name = "qhs_cpr_mxc",
+ .id = SM8450_SLAVE_RBCPR_MXC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_crypto0_cfg = {
+ .name = "qhs_crypto0_cfg",
+ .id = SM8450_SLAVE_CRYPTO_0_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_cx_rdpm = {
+ .name = "qhs_cx_rdpm",
+ .id = SM8450_SLAVE_CX_RDPM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_display_cfg = {
+ .name = "qhs_display_cfg",
+ .id = SM8450_SLAVE_DISPLAY_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_gpuss_cfg = {
+ .name = "qhs_gpuss_cfg",
+ .id = SM8450_SLAVE_GFX3D_CFG,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_imem_cfg = {
+ .name = "qhs_imem_cfg",
+ .id = SM8450_SLAVE_IMEM_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_ipa = {
+ .name = "qhs_ipa",
+ .id = SM8450_SLAVE_IPA_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_ipc_router = {
+ .name = "qhs_ipc_router",
+ .id = SM8450_SLAVE_IPC_ROUTER_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_lpass_cfg = {
+ .name = "qhs_lpass_cfg",
+ .id = SM8450_SLAVE_LPASS,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { MASTER_CNOC_LPASS_AG_NOC },
+};
+
+static struct qcom_icc_node qhs_mss_cfg = {
+ .name = "qhs_mss_cfg",
+ .id = SM8450_SLAVE_CNOC_MSS,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_mx_rdpm = {
+ .name = "qhs_mx_rdpm",
+ .id = SM8450_SLAVE_MX_RDPM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_pcie0_cfg = {
+ .name = "qhs_pcie0_cfg",
+ .id = SM8450_SLAVE_PCIE_0_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_pcie1_cfg = {
+ .name = "qhs_pcie1_cfg",
+ .id = SM8450_SLAVE_PCIE_1_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_pdm = {
+ .name = "qhs_pdm",
+ .id = SM8450_SLAVE_PDM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_pimem_cfg = {
+ .name = "qhs_pimem_cfg",
+ .id = SM8450_SLAVE_PIMEM_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_prng = {
+ .name = "qhs_prng",
+ .id = SM8450_SLAVE_PRNG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_qdss_cfg = {
+ .name = "qhs_qdss_cfg",
+ .id = SM8450_SLAVE_QDSS_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_qspi = {
+ .name = "qhs_qspi",
+ .id = SM8450_SLAVE_QSPI_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_qup0 = {
+ .name = "qhs_qup0",
+ .id = SM8450_SLAVE_QUP_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_qup1 = {
+ .name = "qhs_qup1",
+ .id = SM8450_SLAVE_QUP_1,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_qup2 = {
+ .name = "qhs_qup2",
+ .id = SM8450_SLAVE_QUP_2,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_sdc2 = {
+ .name = "qhs_sdc2",
+ .id = SM8450_SLAVE_SDCC_2,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_sdc4 = {
+ .name = "qhs_sdc4",
+ .id = SM8450_SLAVE_SDCC_4,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_spss_cfg = {
+ .name = "qhs_spss_cfg",
+ .id = SM8450_SLAVE_SPSS_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_tcsr = {
+ .name = "qhs_tcsr",
+ .id = SM8450_SLAVE_TCSR,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_tlmm = {
+ .name = "qhs_tlmm",
+ .id = SM8450_SLAVE_TLMM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_tme_cfg = {
+ .name = "qhs_tme_cfg",
+ .id = SM8450_SLAVE_TME_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_ufs_mem_cfg = {
+ .name = "qhs_ufs_mem_cfg",
+ .id = SM8450_SLAVE_UFS_MEM_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_usb3_0 = {
+ .name = "qhs_usb3_0",
+ .id = SM8450_SLAVE_USB3_0,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_venus_cfg = {
+ .name = "qhs_venus_cfg",
+ .id = SM8450_SLAVE_VENUS_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_vsense_ctrl_cfg = {
+ .name = "qhs_vsense_ctrl_cfg",
+ .id = SM8450_SLAVE_VSENSE_CTRL_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_a1_noc_cfg = {
+ .name = "qns_a1_noc_cfg",
+ .id = SM8450_SLAVE_A1NOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_MASTER_A1NOC_CFG },
+};
+
+static struct qcom_icc_node qns_a2_noc_cfg = {
+ .name = "qns_a2_noc_cfg",
+ .id = SM8450_SLAVE_A2NOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_MASTER_A2NOC_CFG },
+};
+
+static struct qcom_icc_node qns_ddrss_cfg = {
+ .name = "qns_ddrss_cfg",
+ .id = SM8450_SLAVE_DDRSS_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ //FIXME where is link
+};
+
+static struct qcom_icc_node qns_mnoc_cfg = {
+ .name = "qns_mnoc_cfg",
+ .id = SM8450_SLAVE_CNOC_MNOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_MASTER_CNOC_MNOC_CFG },
+};
+
+static struct qcom_icc_node qns_pcie_anoc_cfg = {
+ .name = "qns_pcie_anoc_cfg",
+ .id = SM8450_SLAVE_PCIE_ANOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_MASTER_PCIE_ANOC_CFG },
+};
+
+static struct qcom_icc_node qns_snoc_cfg = {
+ .name = "qns_snoc_cfg",
+ .id = SM8450_SLAVE_SNOC_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 1,
+ .links = { SM8450_MASTER_SNOC_CFG },
+};
+
+static struct qcom_icc_node qxs_imem = {
+ .name = "qxs_imem",
+ .id = SM8450_SLAVE_IMEM,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qxs_pimem = {
+ .name = "qxs_pimem",
+ .id = SM8450_SLAVE_PIMEM,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node srvc_cnoc = {
+ .name = "srvc_cnoc",
+ .id = SM8450_SLAVE_SERVICE_CNOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node xs_pcie_0 = {
+ .name = "xs_pcie_0",
+ .id = SM8450_SLAVE_PCIE_0,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node xs_pcie_1 = {
+ .name = "xs_pcie_1",
+ .id = SM8450_SLAVE_PCIE_1,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node xs_qdss_stm = {
+ .name = "xs_qdss_stm",
+ .id = SM8450_SLAVE_QDSS_STM,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node xs_sys_tcu_cfg = {
+ .name = "xs_sys_tcu_cfg",
+ .id = SM8450_SLAVE_TCU,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_gem_noc_cnoc = {
+ .name = "qns_gem_noc_cnoc",
+ .id = SM8450_SLAVE_GEM_NOC_CNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_GEM_NOC_CNOC },
+};
+
+static struct qcom_icc_node qns_llcc = {
+ .name = "qns_llcc",
+ .id = SM8450_SLAVE_LLCC,
+ .channels = 4,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_LLCC },
+};
+
+static struct qcom_icc_node qns_pcie = {
+ .name = "qns_pcie",
+ .id = SM8450_SLAVE_MEM_NOC_PCIE_SNOC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_MASTER_GEM_NOC_PCIE_SNOC },
+};
+
+static struct qcom_icc_node qhs_lpass_core = {
+ .name = "qhs_lpass_core",
+ .id = SM8450_SLAVE_LPASS_CORE_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_lpass_lpi = {
+ .name = "qhs_lpass_lpi",
+ .id = SM8450_SLAVE_LPASS_LPI_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_lpass_mpu = {
+ .name = "qhs_lpass_mpu",
+ .id = SM8450_SLAVE_LPASS_MPU_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qhs_lpass_top = {
+ .name = "qhs_lpass_top",
+ .id = SM8450_SLAVE_LPASS_TOP_CFG,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_sysnoc = {
+ .name = "qns_sysnoc",
+ .id = SM8450_SLAVE_LPASS_SNOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_LPASS_ANOC },
+};
+
+static struct qcom_icc_node srvc_niu_aml_noc = {
+ .name = "srvc_niu_aml_noc",
+ .id = SM8450_SLAVE_SERVICES_LPASS_AML_NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node srvc_niu_lpass_agnoc = {
+ .name = "srvc_niu_lpass_agnoc",
+ .id = SM8450_SLAVE_SERVICE_LPASS_AG_NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node ebi = {
+ .name = "ebi",
+ .id = SM8450_SLAVE_EBI1,
+ .channels = 4,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_mem_noc_hf = {
+ .name = "qns_mem_noc_hf",
+ .id = SM8450_SLAVE_MNOC_HF_MEM_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_MASTER_MNOC_HF_MEM_NOC },
+};
+
+static struct qcom_icc_node qns_mem_noc_sf = {
+ .name = "qns_mem_noc_sf",
+ .id = SM8450_SLAVE_MNOC_SF_MEM_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_MASTER_MNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node srvc_mnoc = {
+ .name = "srvc_mnoc",
+ .id = SM8450_SLAVE_SERVICE_MNOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_nsp_gemnoc = {
+ .name = "qns_nsp_gemnoc",
+ .id = SM8450_SLAVE_CDSP_MEM_NOC,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_MASTER_COMPUTE_NOC },
+};
+
+static struct qcom_icc_node service_nsp_noc = {
+ .name = "service_nsp_noc",
+ .id = SM8450_SLAVE_SERVICE_NSP_NOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_pcie_mem_noc = {
+ .name = "qns_pcie_mem_noc",
+ .id = SM8450_SLAVE_ANOC_PCIE_GEM_NOC,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_ANOC_PCIE_GEM_NOC },
+};
+
+static struct qcom_icc_node srvc_pcie_aggre_noc = {
+ .name = "srvc_pcie_aggre_noc",
+ .id = SM8450_SLAVE_SERVICE_PCIE_ANOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_gemnoc_gc = {
+ .name = "qns_gemnoc_gc",
+ .id = SM8450_SLAVE_SNOC_GEM_NOC_GC,
+ .channels = 1,
+ .buswidth = 8,
+ .num_links = 1,
+ .links = { SM8450_MASTER_SNOC_GC_MEM_NOC },
+};
+
+static struct qcom_icc_node qns_gemnoc_sf = {
+ .name = "qns_gemnoc_sf",
+ .id = SM8450_SLAVE_SNOC_GEM_NOC_SF,
+ .channels = 1,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_SNOC_SF_MEM_NOC },
+};
+
+static struct qcom_icc_node srvc_snoc = {
+ .name = "srvc_snoc",
+ .id = SM8450_SLAVE_SERVICE_SNOC,
+ .channels = 1,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_llcc_disp = {
+ .name = "qns_llcc_disp",
+ .id = SM8450_SLAVE_LLCC_DISP,
+ .channels = 4,
+ .buswidth = 16,
+ .num_links = 1,
+ .links = { SM8450_MASTER_LLCC_DISP },
+};
+
+static struct qcom_icc_node ebi_disp = {
+ .name = "ebi_disp",
+ .id = SM8450_SLAVE_EBI1_DISP,
+ .channels = 4,
+ .buswidth = 4,
+ .num_links = 0,
+};
+
+static struct qcom_icc_node qns_mem_noc_hf_disp = {
+ .name = "qns_mem_noc_hf_disp",
+ .id = SM8450_SLAVE_MNOC_HF_MEM_NOC_DISP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_MASTER_MNOC_HF_MEM_NOC_DISP },
+};
+
+static struct qcom_icc_node qns_mem_noc_sf_disp = {
+ .name = "qns_mem_noc_sf_disp",
+ .id = SM8450_SLAVE_MNOC_SF_MEM_NOC_DISP,
+ .channels = 2,
+ .buswidth = 32,
+ .num_links = 1,
+ .links = { SM8450_MASTER_MNOC_SF_MEM_NOC_DISP },
+};
+
+static struct qcom_icc_bcm bcm_acv = {
+ .name = "ACV",
+ .num_nodes = 1,
+ .nodes = { &ebi },
+};
+
+static struct qcom_icc_bcm bcm_ce0 = {
+ .name = "CE0",
+ .num_nodes = 1,
+ .nodes = { &qxm_crypto },
+};
+
+static struct qcom_icc_bcm bcm_cn0 = {
+ .name = "CN0",
+ .keepalive = true,
+ .num_nodes = 55,
+ .nodes = { &qnm_gemnoc_cnoc, &qnm_gemnoc_pcie,
+ &qhs_ahb2phy0, &qhs_ahb2phy1,
+ &qhs_aoss, &qhs_camera_cfg,
+ &qhs_clk_ctl, &qhs_compute_cfg,
+ &qhs_cpr_cx, &qhs_cpr_mmcx,
+ &qhs_cpr_mxa, &qhs_cpr_mxc,
+ &qhs_crypto0_cfg, &qhs_cx_rdpm,
+ &qhs_display_cfg, &qhs_gpuss_cfg,
+ &qhs_imem_cfg, &qhs_ipa,
+ &qhs_ipc_router, &qhs_lpass_cfg,
+ &qhs_mss_cfg, &qhs_mx_rdpm,
+ &qhs_pcie0_cfg, &qhs_pcie1_cfg,
+ &qhs_pdm, &qhs_pimem_cfg,
+ &qhs_prng, &qhs_qdss_cfg,
+ &qhs_qspi, &qhs_qup0,
+ &qhs_qup1, &qhs_qup2,
+ &qhs_sdc2, &qhs_sdc4,
+ &qhs_spss_cfg, &qhs_tcsr,
+ &qhs_tlmm, &qhs_tme_cfg,
+ &qhs_ufs_mem_cfg, &qhs_usb3_0,
+ &qhs_venus_cfg, &qhs_vsense_ctrl_cfg,
+ &qns_a1_noc_cfg, &qns_a2_noc_cfg,
+ &qns_ddrss_cfg, &qns_mnoc_cfg,
+ &qns_pcie_anoc_cfg, &qns_snoc_cfg,
+ &qxs_imem, &qxs_pimem,
+ &srvc_cnoc, &xs_pcie_0,
+ &xs_pcie_1, &xs_qdss_stm,
+ &xs_sys_tcu_cfg },
+};
+
+static struct qcom_icc_bcm bcm_co0 = {
+ .name = "CO0",
+ .num_nodes = 2,
+ .nodes = { &qxm_nsp, &qns_nsp_gemnoc },
+};
+
+static struct qcom_icc_bcm bcm_mc0 = {
+ .name = "MC0",
+ .keepalive = true,
+ .num_nodes = 1,
+ .nodes = { &ebi },
+};
+
+static struct qcom_icc_bcm bcm_mm0 = {
+ .name = "MM0",
+ .keepalive = true,
+ .num_nodes = 1,
+ .nodes = { &qns_mem_noc_hf },
+};
+
+static struct qcom_icc_bcm bcm_mm1 = {
+ .name = "MM1",
+ .num_nodes = 12,
+ .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
+ &qnm_camnoc_sf, &qnm_mdp,
+ &qnm_mnoc_cfg, &qnm_rot,
+ &qnm_vapss_hcp, &qnm_video,
+ &qnm_video_cv_cpu, &qnm_video_cvp,
+ &qnm_video_v_cpu, &qns_mem_noc_sf },
+};
+
+static struct qcom_icc_bcm bcm_qup0 = {
+ .name = "QUP0",
+ .keepalive = true,
+ .vote_scale = 1,
+ .num_nodes = 1,
+ .nodes = { &qup0_core_slave },
+};
+
+static struct qcom_icc_bcm bcm_qup1 = {
+ .name = "QUP1",
+ .keepalive = true,
+ .vote_scale = 1,
+ .num_nodes = 1,
+ .nodes = { &qup1_core_slave },
+};
+
+static struct qcom_icc_bcm bcm_qup2 = {
+ .name = "QUP2",
+ .keepalive = true,
+ .vote_scale = 1,
+ .num_nodes = 1,
+ .nodes = { &qup2_core_slave },
+};
+
+static struct qcom_icc_bcm bcm_sh0 = {
+ .name = "SH0",
+ .keepalive = true,
+ .num_nodes = 1,
+ .nodes = { &qns_llcc },
+};
+
+static struct qcom_icc_bcm bcm_sh1 = {
+ .name = "SH1",
+ .num_nodes = 7,
+ .nodes = { &alm_gpu_tcu, &alm_sys_tcu,
+ &qnm_nsp_gemnoc, &qnm_pcie,
+ &qnm_snoc_gc, &qns_gem_noc_cnoc,
+ &qns_pcie },
+};
+
+static struct qcom_icc_bcm bcm_sn0 = {
+ .name = "SN0",
+ .keepalive = true,
+ .num_nodes = 1,
+ .nodes = { &qns_gemnoc_sf },
+};
+
+static struct qcom_icc_bcm bcm_sn1 = {
+ .name = "SN1",
+ .num_nodes = 4,
+ .nodes = { &qhm_gic, &qxm_pimem,
+ &xm_gic, &qns_gemnoc_gc },
+};
+
+static struct qcom_icc_bcm bcm_sn2 = {
+ .name = "SN2",
+ .num_nodes = 1,
+ .nodes = { &qnm_aggre1_noc },
+};
+
+static struct qcom_icc_bcm bcm_sn3 = {
+ .name = "SN3",
+ .num_nodes = 1,
+ .nodes = { &qnm_aggre2_noc },
+};
+
+static struct qcom_icc_bcm bcm_sn4 = {
+ .name = "SN4",
+ .num_nodes = 1,
+ .nodes = { &qnm_lpass_noc },
+};
+
+static struct qcom_icc_bcm bcm_sn7 = {
+ .name = "SN7",
+ .num_nodes = 1,
+ .nodes = { &qns_pcie_mem_noc },
+};
+
+static struct qcom_icc_bcm bcm_acv_disp = {
+ .name = "ACV",
+ .num_nodes = 1,
+ .nodes = { &ebi_disp },
+};
+
+static struct qcom_icc_bcm bcm_mc0_disp = {
+ .name = "MC0",
+ .num_nodes = 1,
+ .nodes = { &ebi_disp },
+};
+
+static struct qcom_icc_bcm bcm_mm0_disp = {
+ .name = "MM0",
+ .num_nodes = 1,
+ .nodes = { &qns_mem_noc_hf_disp },
+};
+
+static struct qcom_icc_bcm bcm_mm1_disp = {
+ .name = "MM1",
+ .num_nodes = 3,
+ .nodes = { &qnm_mdp_disp, &qnm_rot_disp,
+ &qns_mem_noc_sf_disp },
+};
+
+static struct qcom_icc_bcm bcm_sh0_disp = {
+ .name = "SH0",
+ .num_nodes = 1,
+ .nodes = { &qns_llcc_disp },
+};
+
+static struct qcom_icc_bcm bcm_sh1_disp = {
+ .name = "SH1",
+ .num_nodes = 1,
+ .nodes = { &qnm_pcie_disp },
+};
+
+static struct qcom_icc_bcm *aggre1_noc_bcms[] = {
+};
+
+static struct qcom_icc_node *aggre1_noc_nodes[] = {
+ [MASTER_QSPI_0] = &qhm_qspi,
+ [MASTER_QUP_1] = &qhm_qup1,
+ [MASTER_A1NOC_CFG] = &qnm_a1noc_cfg,
+ [MASTER_SDCC_4] = &xm_sdc4,
+ [MASTER_UFS_MEM] = &xm_ufs_mem,
+ [MASTER_USB3_0] = &xm_usb3_0,
+ [SLAVE_A1NOC_SNOC] = &qns_a1noc_snoc,
+ [SLAVE_SERVICE_A1NOC] = &srvc_aggre1_noc,
+};
+
+static struct qcom_icc_desc sm8450_aggre1_noc = {
+ .nodes = aggre1_noc_nodes,
+ .num_nodes = ARRAY_SIZE(aggre1_noc_nodes),
+ .bcms = aggre1_noc_bcms,
+ .num_bcms = ARRAY_SIZE(aggre1_noc_bcms),
+};
+
+static struct qcom_icc_bcm *aggre2_noc_bcms[] = {
+ &bcm_ce0,
+};
+
+static struct qcom_icc_node *aggre2_noc_nodes[] = {
+ [MASTER_QDSS_BAM] = &qhm_qdss_bam,
+ [MASTER_QUP_0] = &qhm_qup0,
+ [MASTER_QUP_2] = &qhm_qup2,
+ [MASTER_A2NOC_CFG] = &qnm_a2noc_cfg,
+ [MASTER_CRYPTO] = &qxm_crypto,
+ [MASTER_IPA] = &qxm_ipa,
+ [MASTER_SENSORS_PROC] = &qxm_sensorss_q6,
+ [MASTER_SP] = &qxm_sp,
+ [MASTER_QDSS_ETR] = &xm_qdss_etr_0,
+ [MASTER_QDSS_ETR_1] = &xm_qdss_etr_1,
+ [MASTER_SDCC_2] = &xm_sdc2,
+ [SLAVE_A2NOC_SNOC] = &qns_a2noc_snoc,
+ [SLAVE_SERVICE_A2NOC] = &srvc_aggre2_noc,
+};
+
+static struct qcom_icc_desc sm8450_aggre2_noc = {
+ .nodes = aggre2_noc_nodes,
+ .num_nodes = ARRAY_SIZE(aggre2_noc_nodes),
+ .bcms = aggre2_noc_bcms,
+ .num_bcms = ARRAY_SIZE(aggre2_noc_bcms),
+};
+
+static struct qcom_icc_bcm *clk_virt_bcms[] = {
+ &bcm_qup0,
+ &bcm_qup1,
+ &bcm_qup2,
+};
+
+static struct qcom_icc_node *clk_virt_nodes[] = {
+ [MASTER_QUP_CORE_0] = &qup0_core_master,
+ [MASTER_QUP_CORE_1] = &qup1_core_master,
+ [MASTER_QUP_CORE_2] = &qup2_core_master,
+ [SLAVE_QUP_CORE_0] = &qup0_core_slave,
+ [SLAVE_QUP_CORE_1] = &qup1_core_slave,
+ [SLAVE_QUP_CORE_2] = &qup2_core_slave,
+};
+
+static struct qcom_icc_desc sm8450_clk_virt = {
+ .nodes = clk_virt_nodes,
+ .num_nodes = ARRAY_SIZE(clk_virt_nodes),
+ .bcms = clk_virt_bcms,
+ .num_bcms = ARRAY_SIZE(clk_virt_bcms),
+};
+
+static struct qcom_icc_bcm *config_noc_bcms[] = {
+ &bcm_cn0,
+};
+
+static struct qcom_icc_node *config_noc_nodes[] = {
+ [MASTER_GEM_NOC_CNOC] = &qnm_gemnoc_cnoc,
+ [MASTER_GEM_NOC_PCIE_SNOC] = &qnm_gemnoc_pcie,
+ [SLAVE_AHB2PHY_SOUTH] = &qhs_ahb2phy0,
+ [SLAVE_AHB2PHY_NORTH] = &qhs_ahb2phy1,
+ [SLAVE_AOSS] = &qhs_aoss,
+ [SLAVE_CAMERA_CFG] = &qhs_camera_cfg,
+ [SLAVE_CLK_CTL] = &qhs_clk_ctl,
+ [SLAVE_CDSP_CFG] = &qhs_compute_cfg,
+ [SLAVE_RBCPR_CX_CFG] = &qhs_cpr_cx,
+ [SLAVE_RBCPR_MMCX_CFG] = &qhs_cpr_mmcx,
+ [SLAVE_RBCPR_MXA_CFG] = &qhs_cpr_mxa,
+ [SLAVE_RBCPR_MXC_CFG] = &qhs_cpr_mxc,
+ [SLAVE_CRYPTO_0_CFG] = &qhs_crypto0_cfg,
+ [SLAVE_CX_RDPM] = &qhs_cx_rdpm,
+ [SLAVE_DISPLAY_CFG] = &qhs_display_cfg,
+ [SLAVE_GFX3D_CFG] = &qhs_gpuss_cfg,
+ [SLAVE_IMEM_CFG] = &qhs_imem_cfg,
+ [SLAVE_IPA_CFG] = &qhs_ipa,
+ [SLAVE_IPC_ROUTER_CFG] = &qhs_ipc_router,
+ [SLAVE_LPASS] = &qhs_lpass_cfg,
+ [SLAVE_CNOC_MSS] = &qhs_mss_cfg,
+ [SLAVE_MX_RDPM] = &qhs_mx_rdpm,
+ [SLAVE_PCIE_0_CFG] = &qhs_pcie0_cfg,
+ [SLAVE_PCIE_1_CFG] = &qhs_pcie1_cfg,
+ [SLAVE_PDM] = &qhs_pdm,
+ [SLAVE_PIMEM_CFG] = &qhs_pimem_cfg,
+ [SLAVE_PRNG] = &qhs_prng,
+ [SLAVE_QDSS_CFG] = &qhs_qdss_cfg,
+ [SLAVE_QSPI_0] = &qhs_qspi,
+ [SLAVE_QUP_0] = &qhs_qup0,
+ [SLAVE_QUP_1] = &qhs_qup1,
+ [SLAVE_QUP_2] = &qhs_qup2,
+ [SLAVE_SDCC_2] = &qhs_sdc2,
+ [SLAVE_SDCC_4] = &qhs_sdc4,
+ [SLAVE_SPSS_CFG] = &qhs_spss_cfg,
+ [SLAVE_TCSR] = &qhs_tcsr,
+ [SLAVE_TLMM] = &qhs_tlmm,
+ [SLAVE_TME_CFG] = &qhs_tme_cfg,
+ [SLAVE_UFS_MEM_CFG] = &qhs_ufs_mem_cfg,
+ [SLAVE_USB3_0] = &qhs_usb3_0,
+ [SLAVE_VENUS_CFG] = &qhs_venus_cfg,
+ [SLAVE_VSENSE_CTRL_CFG] = &qhs_vsense_ctrl_cfg,
+ [SLAVE_A1NOC_CFG] = &qns_a1_noc_cfg,
+ [SLAVE_A2NOC_CFG] = &qns_a2_noc_cfg,
+ [SLAVE_DDRSS_CFG] = &qns_ddrss_cfg,
+ [SLAVE_CNOC_MNOC_CFG] = &qns_mnoc_cfg,
+ [SLAVE_PCIE_ANOC_CFG] = &qns_pcie_anoc_cfg,
+ [SLAVE_SNOC_CFG] = &qns_snoc_cfg,
+ [SLAVE_IMEM] = &qxs_imem,
+ [SLAVE_PIMEM] = &qxs_pimem,
+ [SLAVE_SERVICE_CNOC] = &srvc_cnoc,
+ [SLAVE_PCIE_0] = &xs_pcie_0,
+ [SLAVE_PCIE_1] = &xs_pcie_1,
+ [SLAVE_QDSS_STM] = &xs_qdss_stm,
+ [SLAVE_TCU] = &xs_sys_tcu_cfg,
+};
+
+static struct qcom_icc_desc sm8450_config_noc = {
+ .nodes = config_noc_nodes,
+ .num_nodes = ARRAY_SIZE(config_noc_nodes),
+ .bcms = config_noc_bcms,
+ .num_bcms = ARRAY_SIZE(config_noc_bcms),
+};
+
+static struct qcom_icc_bcm *gem_noc_bcms[] = {
+ &bcm_sh0,
+ &bcm_sh1,
+ &bcm_sh0_disp,
+ &bcm_sh1_disp,
+};
+
+static struct qcom_icc_node *gem_noc_nodes[] = {
+ [MASTER_GPU_TCU] = &alm_gpu_tcu,
+ [MASTER_SYS_TCU] = &alm_sys_tcu,
+ [MASTER_APPSS_PROC] = &chm_apps,
+ [MASTER_GFX3D] = &qnm_gpu,
+ [MASTER_MSS_PROC] = &qnm_mdsp,
+ [MASTER_MNOC_HF_MEM_NOC] = &qnm_mnoc_hf,
+ [MASTER_MNOC_SF_MEM_NOC] = &qnm_mnoc_sf,
+ [MASTER_COMPUTE_NOC] = &qnm_nsp_gemnoc,
+ [MASTER_ANOC_PCIE_GEM_NOC] = &qnm_pcie,
+ [MASTER_SNOC_GC_MEM_NOC] = &qnm_snoc_gc,
+ [MASTER_SNOC_SF_MEM_NOC] = &qnm_snoc_sf,
+ [SLAVE_GEM_NOC_CNOC] = &qns_gem_noc_cnoc,
+ [SLAVE_LLCC] = &qns_llcc,
+ [SLAVE_MEM_NOC_PCIE_SNOC] = &qns_pcie,
+ [MASTER_MNOC_HF_MEM_NOC_DISP] = &qnm_mnoc_hf_disp,
+ [MASTER_MNOC_SF_MEM_NOC_DISP] = &qnm_mnoc_sf_disp,
+ [MASTER_ANOC_PCIE_GEM_NOC_DISP] = &qnm_pcie_disp,
+ [SLAVE_LLCC_DISP] = &qns_llcc_disp,
+};
+
+static struct qcom_icc_desc sm8450_gem_noc = {
+ .nodes = gem_noc_nodes,
+ .num_nodes = ARRAY_SIZE(gem_noc_nodes),
+ .bcms = gem_noc_bcms,
+ .num_bcms = ARRAY_SIZE(gem_noc_bcms),
+};
+
+static struct qcom_icc_bcm *lpass_ag_noc_bcms[] = {
+};
+
+static struct qcom_icc_node *lpass_ag_noc_nodes[] = {
+ [MASTER_CNOC_LPASS_AG_NOC] = &qhm_config_noc,
+ [MASTER_LPASS_PROC] = &qxm_lpass_dsp,
+ [SLAVE_LPASS_CORE_CFG] = &qhs_lpass_core,
+ [SLAVE_LPASS_LPI_CFG] = &qhs_lpass_lpi,
+ [SLAVE_LPASS_MPU_CFG] = &qhs_lpass_mpu,
+ [SLAVE_LPASS_TOP_CFG] = &qhs_lpass_top,
+ [SLAVE_LPASS_SNOC] = &qns_sysnoc,
+ [SLAVE_SERVICES_LPASS_AML_NOC] = &srvc_niu_aml_noc,
+ [SLAVE_SERVICE_LPASS_AG_NOC] = &srvc_niu_lpass_agnoc,
+};
+
+static struct qcom_icc_desc sm8450_lpass_ag_noc = {
+ .nodes = lpass_ag_noc_nodes,
+ .num_nodes = ARRAY_SIZE(lpass_ag_noc_nodes),
+ .bcms = lpass_ag_noc_bcms,
+ .num_bcms = ARRAY_SIZE(lpass_ag_noc_bcms),
+};
+
+static struct qcom_icc_bcm *mc_virt_bcms[] = {
+ &bcm_acv,
+ &bcm_mc0,
+ &bcm_acv_disp,
+ &bcm_mc0_disp,
+};
+
+static struct qcom_icc_node *mc_virt_nodes[] = {
+ [MASTER_LLCC] = &llcc_mc,
+ [SLAVE_EBI1] = &ebi,
+ [MASTER_LLCC_DISP] = &llcc_mc_disp,
+ [SLAVE_EBI1_DISP] = &ebi_disp,
+};
+
+static struct qcom_icc_desc sm8450_mc_virt = {
+ .nodes = mc_virt_nodes,
+ .num_nodes = ARRAY_SIZE(mc_virt_nodes),
+ .bcms = mc_virt_bcms,
+ .num_bcms = ARRAY_SIZE(mc_virt_bcms),
+};
+
+static struct qcom_icc_bcm *mmss_noc_bcms[] = {
+ &bcm_mm0,
+ &bcm_mm1,
+ &bcm_mm0_disp,
+ &bcm_mm1_disp,
+};
+
+static struct qcom_icc_node *mmss_noc_nodes[] = {
+ [MASTER_CAMNOC_HF] = &qnm_camnoc_hf,
+ [MASTER_CAMNOC_ICP] = &qnm_camnoc_icp,
+ [MASTER_CAMNOC_SF] = &qnm_camnoc_sf,
+ [MASTER_MDP] = &qnm_mdp,
+ [MASTER_CNOC_MNOC_CFG] = &qnm_mnoc_cfg,
+ [MASTER_ROTATOR] = &qnm_rot,
+ [MASTER_CDSP_HCP] = &qnm_vapss_hcp,
+ [MASTER_VIDEO] = &qnm_video,
+ [MASTER_VIDEO_CV_PROC] = &qnm_video_cv_cpu,
+ [MASTER_VIDEO_PROC] = &qnm_video_cvp,
+ [MASTER_VIDEO_V_PROC] = &qnm_video_v_cpu,
+ [SLAVE_MNOC_HF_MEM_NOC] = &qns_mem_noc_hf,
+ [SLAVE_MNOC_SF_MEM_NOC] = &qns_mem_noc_sf,
+ [SLAVE_SERVICE_MNOC] = &srvc_mnoc,
+ [MASTER_MDP_DISP] = &qnm_mdp_disp,
+ [MASTER_ROTATOR_DISP] = &qnm_rot_disp,
+ [SLAVE_MNOC_HF_MEM_NOC_DISP] = &qns_mem_noc_hf_disp,
+ [SLAVE_MNOC_SF_MEM_NOC_DISP] = &qns_mem_noc_sf_disp,
+};
+
+static struct qcom_icc_desc sm8450_mmss_noc = {
+ .nodes = mmss_noc_nodes,
+ .num_nodes = ARRAY_SIZE(mmss_noc_nodes),
+ .bcms = mmss_noc_bcms,
+ .num_bcms = ARRAY_SIZE(mmss_noc_bcms),
+};
+
+static struct qcom_icc_bcm *nsp_noc_bcms[] = {
+ &bcm_co0,
+};
+
+static struct qcom_icc_node *nsp_noc_nodes[] = {
+ [MASTER_CDSP_NOC_CFG] = &qhm_nsp_noc_config,
+ [MASTER_CDSP_PROC] = &qxm_nsp,
+ [SLAVE_CDSP_MEM_NOC] = &qns_nsp_gemnoc,
+ [SLAVE_SERVICE_NSP_NOC] = &service_nsp_noc,
+};
+
+static struct qcom_icc_desc sm8450_nsp_noc = {
+ .nodes = nsp_noc_nodes,
+ .num_nodes = ARRAY_SIZE(nsp_noc_nodes),
+ .bcms = nsp_noc_bcms,
+ .num_bcms = ARRAY_SIZE(nsp_noc_bcms),
+};
+
+static struct qcom_icc_bcm *pcie_anoc_bcms[] = {
+ &bcm_sn7,
+};
+
+static struct qcom_icc_node *pcie_anoc_nodes[] = {
+ [MASTER_PCIE_ANOC_CFG] = &qnm_pcie_anoc_cfg,
+ [MASTER_PCIE_0] = &xm_pcie3_0,
+ [MASTER_PCIE_1] = &xm_pcie3_1,
+ [SLAVE_ANOC_PCIE_GEM_NOC] = &qns_pcie_mem_noc,
+ [SLAVE_SERVICE_PCIE_ANOC] = &srvc_pcie_aggre_noc,
+};
+
+static struct qcom_icc_desc sm8450_pcie_anoc = {
+ .nodes = pcie_anoc_nodes,
+ .num_nodes = ARRAY_SIZE(pcie_anoc_nodes),
+ .bcms = pcie_anoc_bcms,
+ .num_bcms = ARRAY_SIZE(pcie_anoc_bcms),
+};
+
+static struct qcom_icc_bcm *system_noc_bcms[] = {
+ &bcm_sn0,
+ &bcm_sn1,
+ &bcm_sn2,
+ &bcm_sn3,
+ &bcm_sn4,
+};
+
+static struct qcom_icc_node *system_noc_nodes[] = {
+ [MASTER_GIC_AHB] = &qhm_gic,
+ [MASTER_A1NOC_SNOC] = &qnm_aggre1_noc,
+ [MASTER_A2NOC_SNOC] = &qnm_aggre2_noc,
+ [MASTER_LPASS_ANOC] = &qnm_lpass_noc,
+ [MASTER_SNOC_CFG] = &qnm_snoc_cfg,
+ [MASTER_PIMEM] = &qxm_pimem,
+ [MASTER_GIC] = &xm_gic,
+ [SLAVE_SNOC_GEM_NOC_GC] = &qns_gemnoc_gc,
+ [SLAVE_SNOC_GEM_NOC_SF] = &qns_gemnoc_sf,
+ [SLAVE_SERVICE_SNOC] = &srvc_snoc,
+};
+
+static struct qcom_icc_desc sm8450_system_noc = {
+ .nodes = system_noc_nodes,
+ .num_nodes = ARRAY_SIZE(system_noc_nodes),
+ .bcms = system_noc_bcms,
+ .num_bcms = ARRAY_SIZE(system_noc_bcms),
+};
+
+static int qnoc_probe(struct platform_device *pdev)
+{
+ const struct qcom_icc_desc *desc;
+ struct icc_onecell_data *data;
+ struct icc_provider *provider;
+ struct qcom_icc_node **qnodes;
+ struct qcom_icc_provider *qp;
+ struct icc_node *node;
+ size_t num_nodes, i;
+ int ret;
+
+ desc = device_get_match_data(&pdev->dev);
+ if (!desc)
+ return -EINVAL;
+
+ qnodes = desc->nodes;
+ num_nodes = desc->num_nodes;
+
+ qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+
+ data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ provider = &qp->provider;
+ provider->dev = &pdev->dev;
+ provider->set = qcom_icc_set;
+ provider->pre_aggregate = qcom_icc_pre_aggregate;
+ provider->aggregate = qcom_icc_aggregate;
+ provider->xlate_extended = qcom_icc_xlate_extended;
+ INIT_LIST_HEAD(&provider->nodes);
+ provider->data = data;
+
+ qp->dev = &pdev->dev;
+ qp->bcms = desc->bcms;
+ qp->num_bcms = desc->num_bcms;
+
+ qp->voter = of_bcm_voter_get(qp->dev, NULL);
+ if (IS_ERR(qp->voter))
+ return PTR_ERR(qp->voter);
+
+ ret = icc_provider_add(provider);
+ if (ret) {
+ dev_err(&pdev->dev, "error adding interconnect provider\n");
+ return ret;
+ }
+
+ for (i = 0; i < qp->num_bcms; i++)
+ qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
+
+ for (i = 0; i < num_nodes; i++) {
+ size_t j;
+
+ if (!qnodes[i])
+ continue;
+
+ node = icc_node_create(qnodes[i]->id);
+ if (IS_ERR(node)) {
+ ret = PTR_ERR(node);
+ goto err;
+ }
+
+ node->name = qnodes[i]->name;
+ node->data = qnodes[i];
+ icc_node_add(node, provider);
+
+ for (j = 0; j < qnodes[i]->num_links; j++)
+ icc_link_create(node, qnodes[i]->links[j]);
+
+ data->nodes[i] = node;
+ }
+ data->num_nodes = num_nodes;
+
+ platform_set_drvdata(pdev, qp);
+
+ return 0;
+err:
+ icc_nodes_remove(provider);
+ icc_provider_del(provider);
+ return ret;
+}
+
+static int qnoc_remove(struct platform_device *pdev)
+{
+ struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+ icc_nodes_remove(&qp->provider);
+ return icc_provider_del(&qp->provider);
+}
+
+static const struct of_device_id qnoc_of_match[] = {
+ { .compatible = "qcom,sm8450-aggre1-noc",
+ .data = &sm8450_aggre1_noc},
+ { .compatible = "qcom,sm8450-aggre2-noc",
+ .data = &sm8450_aggre2_noc},
+ { .compatible = "qcom,sm8450-clk-virt",
+ .data = &sm8450_clk_virt},
+ { .compatible = "qcom,sm8450-config-noc",
+ .data = &sm8450_config_noc},
+ { .compatible = "qcom,sm8450-gem-noc",
+ .data = &sm8450_gem_noc},
+ { .compatible = "qcom,sm8450-lpass-ag-noc",
+ .data = &sm8450_lpass_ag_noc},
+ { .compatible = "qcom,sm8450-mc-virt",
+ .data = &sm8450_mc_virt},
+ { .compatible = "qcom,sm8450-mmss-noc",
+ .data = &sm8450_mmss_noc},
+ { .compatible = "qcom,sm8450-nsp-noc",
+ .data = &sm8450_nsp_noc},
+ { .compatible = "qcom,sm8450-pcie-anoc",
+ .data = &sm8450_pcie_anoc},
+ { .compatible = "qcom,sm8450-system-noc",
+ .data = &sm8450_system_noc},
+ { }
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+ .probe = qnoc_probe,
+ .remove = qnoc_remove,
+ .driver = {
+ .name = "qnoc-sm8450",
+ .of_match_table = qnoc_of_match,
+ },
+};
+
+static int __init qnoc_driver_init(void)
+{
+ return platform_driver_register(&qnoc_driver);
+}
+core_initcall(qnoc_driver_init);
+
+static void __exit qnoc_driver_exit(void)
+{
+ platform_driver_unregister(&qnoc_driver);
+}
+module_exit(qnoc_driver_exit);
+
+MODULE_DESCRIPTION("sm8450 NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/sm8450.h b/drivers/interconnect/qcom/sm8450.h
new file mode 100644
index 000000000000..a5790ec6767b
--- /dev/null
+++ b/drivers/interconnect/qcom/sm8450.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SM8450 interconnect IDs
+ *
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Limited
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_SM8450_H
+#define __DRIVERS_INTERCONNECT_QCOM_SM8450_H
+
+#define SM8450_MASTER_GPU_TCU 0
+#define SM8450_MASTER_SYS_TCU 1
+#define SM8450_MASTER_APPSS_PROC 2
+#define SM8450_MASTER_LLCC 3
+#define SM8450_MASTER_CNOC_LPASS_AG_NOC 4
+#define SM8450_MASTER_GIC_AHB 5
+#define SM8450_MASTER_CDSP_NOC_CFG 6
+#define SM8450_MASTER_QDSS_BAM 7
+#define SM8450_MASTER_QSPI_0 8
+#define SM8450_MASTER_QUP_0 9
+#define SM8450_MASTER_QUP_1 10
+#define SM8450_MASTER_QUP_2 11
+#define SM8450_MASTER_A1NOC_CFG 12
+#define SM8450_MASTER_A2NOC_CFG 13
+#define SM8450_MASTER_A1NOC_SNOC 14
+#define SM8450_MASTER_A2NOC_SNOC 15
+#define SM8450_MASTER_CAMNOC_HF 16
+#define SM8450_MASTER_CAMNOC_ICP 17
+#define SM8450_MASTER_CAMNOC_SF 18
+#define SM8450_MASTER_GEM_NOC_CNOC 19
+#define SM8450_MASTER_GEM_NOC_PCIE_SNOC 20
+#define SM8450_MASTER_GFX3D 21
+#define SM8450_MASTER_LPASS_ANOC 22
+#define SM8450_MASTER_MDP 23
+#define SM8450_MASTER_MDP0 SM8450_MASTER_MDP
+#define SM8450_MASTER_MDP1 SM8450_MASTER_MDP
+#define SM8450_MASTER_MSS_PROC 24
+#define SM8450_MASTER_CNOC_MNOC_CFG 25
+#define SM8450_MASTER_MNOC_HF_MEM_NOC 26
+#define SM8450_MASTER_MNOC_SF_MEM_NOC 27
+#define SM8450_MASTER_COMPUTE_NOC 28
+#define SM8450_MASTER_ANOC_PCIE_GEM_NOC 29
+#define SM8450_MASTER_PCIE_ANOC_CFG 30
+#define SM8450_MASTER_ROTATOR 31
+#define SM8450_MASTER_SNOC_CFG 32
+#define SM8450_MASTER_SNOC_GC_MEM_NOC 33
+#define SM8450_MASTER_SNOC_SF_MEM_NOC 34
+#define SM8450_MASTER_CDSP_HCP 35
+#define SM8450_MASTER_VIDEO 36
+#define SM8450_MASTER_VIDEO_P0 SM8450_MASTER_VIDEO
+#define SM8450_MASTER_VIDEO_P1 SM8450_MASTER_VIDEO
+#define SM8450_MASTER_VIDEO_CV_PROC 37
+#define SM8450_MASTER_VIDEO_PROC 38
+#define SM8450_MASTER_VIDEO_V_PROC 39
+#define SM8450_MASTER_QUP_CORE_0 40
+#define SM8450_MASTER_QUP_CORE_1 41
+#define SM8450_MASTER_QUP_CORE_2 42
+#define SM8450_MASTER_CRYPTO 43
+#define SM8450_MASTER_IPA 44
+#define SM8450_MASTER_LPASS_PROC 45
+#define SM8450_MASTER_CDSP_PROC 46
+#define SM8450_MASTER_PIMEM 47
+#define SM8450_MASTER_SENSORS_PROC 48
+#define SM8450_MASTER_SP 49
+#define SM8450_MASTER_GIC 50
+#define SM8450_MASTER_PCIE_0 51
+#define SM8450_MASTER_PCIE_1 52
+#define SM8450_MASTER_QDSS_ETR 53
+#define SM8450_MASTER_QDSS_ETR_1 54
+#define SM8450_MASTER_SDCC_2 55
+#define SM8450_MASTER_SDCC_4 56
+#define SM8450_MASTER_UFS_MEM 57
+#define SM8450_MASTER_USB3_0 58
+#define SM8450_SLAVE_EBI1 512
+#define SM8450_SLAVE_AHB2PHY_SOUTH 513
+#define SM8450_SLAVE_AHB2PHY_NORTH 514
+#define SM8450_SLAVE_AOSS 515
+#define SM8450_SLAVE_CAMERA_CFG 516
+#define SM8450_SLAVE_CLK_CTL 517
+#define SM8450_SLAVE_CDSP_CFG 518
+#define SM8450_SLAVE_RBCPR_CX_CFG 519
+#define SM8450_SLAVE_RBCPR_MMCX_CFG 520
+#define SM8450_SLAVE_RBCPR_MXA_CFG 521
+#define SM8450_SLAVE_RBCPR_MXC_CFG 522
+#define SM8450_SLAVE_CRYPTO_0_CFG 523
+#define SM8450_SLAVE_CX_RDPM 524
+#define SM8450_SLAVE_DISPLAY_CFG 525
+#define SM8450_SLAVE_GFX3D_CFG 526
+#define SM8450_SLAVE_IMEM_CFG 527
+#define SM8450_SLAVE_IPA_CFG 528
+#define SM8450_SLAVE_IPC_ROUTER_CFG 529
+#define SM8450_SLAVE_LPASS 530
+#define SM8450_SLAVE_LPASS_CORE_CFG 531
+#define SM8450_SLAVE_LPASS_LPI_CFG 532
+#define SM8450_SLAVE_LPASS_MPU_CFG 533
+#define SM8450_SLAVE_LPASS_TOP_CFG 534
+#define SM8450_SLAVE_CNOC_MSS 535
+#define SM8450_SLAVE_MX_RDPM 536
+#define SM8450_SLAVE_PCIE_0_CFG 537
+#define SM8450_SLAVE_PCIE_1_CFG 538
+#define SM8450_SLAVE_PDM 539
+#define SM8450_SLAVE_PIMEM_CFG 540
+#define SM8450_SLAVE_PRNG 541
+#define SM8450_SLAVE_QDSS_CFG 542
+#define SM8450_SLAVE_QSPI_0 543
+#define SM8450_SLAVE_QUP_0 544
+#define SM8450_SLAVE_QUP_1 545
+#define SM8450_SLAVE_QUP_2 546
+#define SM8450_SLAVE_SDCC_2 547
+#define SM8450_SLAVE_SDCC_4 548
+#define SM8450_SLAVE_SPSS_CFG 549
+#define SM8450_SLAVE_TCSR 550
+#define SM8450_SLAVE_TLMM 551
+#define SM8450_SLAVE_TME_CFG 552
+#define SM8450_SLAVE_UFS_MEM_CFG 553
+#define SM8450_SLAVE_USB3_0 554
+#define SM8450_SLAVE_VENUS_CFG 555
+#define SM8450_SLAVE_VSENSE_CTRL_CFG 556
+#define SM8450_SLAVE_A1NOC_CFG 557
+#define SM8450_SLAVE_A1NOC_SNOC 558
+#define SM8450_SLAVE_A2NOC_CFG 559
+#define SM8450_SLAVE_A2NOC_SNOC 560
+#define SM8450_SLAVE_DDRSS_CFG 561
+#define SM8450_SLAVE_GEM_NOC_CNOC 562
+#define SM8450_SLAVE_SNOC_GEM_NOC_GC 563
+#define SM8450_SLAVE_SNOC_GEM_NOC_SF 564
+#define SM8450_SLAVE_LLCC 565
+#define SM8450_SLAVE_MNOC_HF_MEM_NOC 566
+#define SM8450_SLAVE_MNOC_SF_MEM_NOC 567
+#define SM8450_SLAVE_CNOC_MNOC_CFG 568
+#define SM8450_SLAVE_CDSP_MEM_NOC 569
+#define SM8450_SLAVE_MEM_NOC_PCIE_SNOC 570
+#define SM8450_SLAVE_PCIE_ANOC_CFG 571
+#define SM8450_SLAVE_ANOC_PCIE_GEM_NOC 572
+#define SM8450_SLAVE_SNOC_CFG 573
+#define SM8450_SLAVE_LPASS_SNOC 574
+#define SM8450_SLAVE_QUP_CORE_0 575
+#define SM8450_SLAVE_QUP_CORE_1 576
+#define SM8450_SLAVE_QUP_CORE_2 577
+#define SM8450_SLAVE_IMEM 578
+#define SM8450_SLAVE_PIMEM 579
+#define SM8450_SLAVE_SERVICE_NSP_NOC 580
+#define SM8450_SLAVE_SERVICE_A1NOC 581
+#define SM8450_SLAVE_SERVICE_A2NOC 582
+#define SM8450_SLAVE_SERVICE_CNOC 583
+#define SM8450_SLAVE_SERVICE_MNOC 584
+#define SM8450_SLAVE_SERVICES_LPASS_AML_NOC 585
+#define SM8450_SLAVE_SERVICE_LPASS_AG_NOC 586
+#define SM8450_SLAVE_SERVICE_PCIE_ANOC 587
+#define SM8450_SLAVE_SERVICE_SNOC 588
+#define SM8450_SLAVE_PCIE_0 589
+#define SM8450_SLAVE_PCIE_1 590
+#define SM8450_SLAVE_QDSS_STM 591
+#define SM8450_SLAVE_TCU 592
+#define SM8450_MASTER_LLCC_DISP 1000
+#define SM8450_MASTER_MDP_DISP 1001
+#define SM8450_MASTER_MDP0_DISP SM8450_MASTER_MDP_DISP
+#define SM8450_MASTER_MDP1_DISP SM8450_MASTER_MDP_DISP
+#define SM8450_MASTER_MNOC_HF_MEM_NOC_DISP 1002
+#define SM8450_MASTER_MNOC_SF_MEM_NOC_DISP 1003
+#define SM8450_MASTER_ANOC_PCIE_GEM_NOC_DISP 1004
+#define SM8450_MASTER_ROTATOR_DISP 1005
+#define SM8450_SLAVE_EBI1_DISP 1512
+#define SM8450_SLAVE_LLCC_DISP 1513
+#define SM8450_SLAVE_MNOC_HF_MEM_NOC_DISP 1514
+#define SM8450_SLAVE_MNOC_SF_MEM_NOC_DISP 1515
+
+#endif
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 1f793219cac6..f2aa34f57454 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1189,7 +1189,7 @@ static void viommu_remove(struct virtio_device *vdev)
iommu_device_unregister(&viommu->iommu);
/* Stop all virtqueues */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
dev_info(&vdev->dev, "device removed\n");
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 28b8581b44dd..d8c4d5664145 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -239,33 +239,11 @@ static struct ctl_table mac_hid_files[] = {
{ }
};
-/* dir in /proc/sys/dev */
-static struct ctl_table mac_hid_dir[] = {
- {
- .procname = "mac_hid",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_files,
- },
- { }
-};
-
-/* /proc/sys/dev itself, in case that is not there yet */
-static struct ctl_table mac_hid_root_dir[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_dir,
- },
- { }
-};
-
static struct ctl_table_header *mac_hid_sysctl_header;
static int __init mac_hid_init(void)
{
- mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir);
+ mac_hid_sysctl_header = register_sysctl("dev/mac_hid", mac_hid_files);
if (!mac_hid_sysctl_header)
return -ENOMEM;
diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c
index ec67065d5202..a3ab6a43fb14 100644
--- a/drivers/media/cec/core/cec-core.c
+++ b/drivers/media/cec/core/cec-core.c
@@ -106,7 +106,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode,
/* Part 1: Find a free minor number */
mutex_lock(&cec_devnode_lock);
- minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0);
+ minor = find_first_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES);
if (minor == CEC_NUM_DEVICES) {
mutex_unlock(&cec_devnode_lock);
pr_err("could not get a free minor\n");
diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c
index f11382afe23b..680fbb3a9340 100644
--- a/drivers/media/mc/mc-devnode.c
+++ b/drivers/media/mc/mc-devnode.c
@@ -217,7 +217,7 @@ int __must_check media_devnode_register(struct media_device *mdev,
/* Part 1: Find a free minor number */
mutex_lock(&media_devnode_lock);
- minor = find_next_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES, 0);
+ minor = find_first_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES);
if (minor == MEDIA_NUM_DEVICES) {
mutex_unlock(&media_devnode_lock);
pr_err("could not get a free minor\n");
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 24a4532053e4..e90adfa57950 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -300,8 +300,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc)
if (!hdr.ExtPageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -316,8 +316,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc)
rc = 1;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return rc;
}
@@ -1661,16 +1661,14 @@ mpt_mapresources(MPT_ADAPTER *ioc)
const uint64_t required_mask = dma_get_required_mask
(&pdev->dev);
if (required_mask > DMA_BIT_MASK(32)
- && !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(64))) {
+ && !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
ioc->dma_mask = DMA_BIT_MASK(64);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
ioc->name));
- } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32))) {
+ } else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
ioc->dma_mask = DMA_BIT_MASK(32);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
@@ -1681,9 +1679,8 @@ mpt_mapresources(MPT_ADAPTER *ioc)
goto out_pci_release_region;
}
} else {
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pdev,
- DMA_BIT_MASK(32))) {
+ if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
ioc->dma_mask = DMA_BIT_MASK(32);
dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
@@ -2769,9 +2766,9 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
if (ioc->spi_data.pIocPg4 != NULL) {
sz = ioc->spi_data.IocPg4Sz;
- pci_free_consistent(ioc->pcidev, sz,
- ioc->spi_data.pIocPg4,
- ioc->spi_data.IocPg4_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz,
+ ioc->spi_data.pIocPg4,
+ ioc->spi_data.IocPg4_dma);
ioc->spi_data.pIocPg4 = NULL;
ioc->alloc_total -= sz;
}
@@ -3515,7 +3512,8 @@ mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size)
rc = 0;
goto out;
}
- ioc->cached_fw = pci_alloc_consistent(ioc->pcidev, size, &ioc->cached_fw_dma);
+ ioc->cached_fw = dma_alloc_coherent(&ioc->pcidev->dev, size,
+ &ioc->cached_fw_dma, GFP_ATOMIC);
if (!ioc->cached_fw) {
printk(MYIOC_s_ERR_FMT "Unable to allocate memory for the cached firmware image!\n",
ioc->name);
@@ -3548,7 +3546,8 @@ mpt_free_fw_memory(MPT_ADAPTER *ioc)
sz = ioc->facts.FWImageSize;
dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "free_fw_memory: FW Image @ %p[%p], sz=%d[%x] bytes\n",
ioc->name, ioc->cached_fw, (void *)(ulong)ioc->cached_fw_dma, sz, sz));
- pci_free_consistent(ioc->pcidev, sz, ioc->cached_fw, ioc->cached_fw_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz, ioc->cached_fw,
+ ioc->cached_fw_dma);
ioc->alloc_total -= sz;
ioc->cached_fw = NULL;
}
@@ -4447,9 +4446,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
*/
if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078 &&
ioc->dma_mask > DMA_BIT_MASK(35)) {
- if (!pci_set_dma_mask(ioc->pcidev, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(32))) {
+ if (!dma_set_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32))
+ && !dma_set_coherent_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32))) {
dma_mask = DMA_BIT_MASK(35);
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"setting 35 bit addressing for "
@@ -4457,10 +4455,10 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
ioc->name));
} else {
/*Reseting DMA mask to 64 bit*/
- pci_set_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64));
- pci_set_consistent_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64));
+ dma_set_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64));
+ dma_set_coherent_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64));
printk(MYIOC_s_ERR_FMT
"failed setting 35 bit addressing for "
@@ -4595,8 +4593,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
alloc_dma += ioc->reply_sz;
}
- if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev,
- ioc->dma_mask) && !pci_set_consistent_dma_mask(ioc->pcidev,
+ if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev,
+ ioc->dma_mask) && !dma_set_coherent_mask(&ioc->pcidev->dev,
ioc->dma_mask))
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"restoring 64 bit addressing\n", ioc->name));
@@ -4620,8 +4618,8 @@ out_fail:
ioc->sense_buf_pool = NULL;
}
- if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev,
- DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(ioc->pcidev,
+ if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev,
+ DMA_BIT_MASK(64)) && !dma_set_coherent_mask(&ioc->pcidev->dev,
DMA_BIT_MASK(64)))
d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
"restoring 64 bit addressing\n", ioc->name));
@@ -4968,7 +4966,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
if (hdr.PageLength > 0) {
data_sz = hdr.PageLength * 4;
- ppage0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page0_dma);
+ ppage0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page0_dma, GFP_KERNEL);
rc = -ENOMEM;
if (ppage0_alloc) {
memset((u8 *)ppage0_alloc, 0, data_sz);
@@ -4982,7 +4981,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage0_alloc, page0_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage0_alloc, page0_dma);
/* FIXME!
* Normalize endianness of structure data,
@@ -5014,7 +5014,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
data_sz = hdr.PageLength * 4;
rc = -ENOMEM;
- ppage1_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page1_dma);
+ ppage1_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page1_dma, GFP_KERNEL);
if (ppage1_alloc) {
memset((u8 *)ppage1_alloc, 0, data_sz);
cfg.physAddr = page1_dma;
@@ -5026,7 +5027,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc)
memcpy(&ioc->lan_cnfg_page1, ppage1_alloc, copy_sz);
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage1_alloc, page1_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage1_alloc, page1_dma);
/* FIXME!
* Normalize endianness of structure data,
@@ -5315,7 +5317,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc)
/* Read the config page */
data_sz = hdr.PageLength * 4;
rc = -ENOMEM;
- ppage_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ ppage_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page_dma, GFP_KERNEL);
if (ppage_alloc) {
memset((u8 *)ppage_alloc, 0, data_sz);
cfg.physAddr = page_dma;
@@ -5325,7 +5328,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc)
if ((rc = mpt_config(ioc, &cfg)) == 0)
ioc->biosVersion = le32_to_cpu(ppage_alloc->BiosVersion);
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)ppage_alloc, page_dma);
}
return rc;
@@ -5400,7 +5404,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
return -EFAULT;
if (header.PageLength > 0) {
- pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, &buf_dma,
+ GFP_KERNEL);
if (pbuf) {
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
cfg.physAddr = buf_dma;
@@ -5456,7 +5462,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
}
}
if (pbuf) {
- pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, pbuf,
+ buf_dma);
}
}
}
@@ -5478,7 +5486,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
if (header.PageLength > 0) {
/* Allocate memory and read SCSI Port Page 2
*/
- pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, &buf_dma,
+ GFP_KERNEL);
if (pbuf) {
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_NVRAM;
cfg.physAddr = buf_dma;
@@ -5543,7 +5553,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum)
}
}
- pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ header.PageLength * 4, pbuf,
+ buf_dma);
}
}
@@ -5659,8 +5671,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id)
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -5707,8 +5719,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/**
@@ -5752,8 +5764,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = -ENOMEM;
@@ -5776,8 +5788,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5819,8 +5831,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = 0;
@@ -5840,8 +5852,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5891,8 +5903,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
rc = -ENOMEM;
@@ -5929,8 +5941,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
return rc;
}
@@ -5986,7 +5998,8 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
return -EFAULT;
iocpage2sz = header.PageLength * 4;
- pIoc2 = pci_alloc_consistent(ioc->pcidev, iocpage2sz, &ioc2_dma);
+ pIoc2 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage2sz, &ioc2_dma,
+ GFP_KERNEL);
if (!pIoc2)
return -ENOMEM;
@@ -6011,7 +6024,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
pIoc2->RaidVolume[i].VolumeID);
out:
- pci_free_consistent(ioc->pcidev, iocpage2sz, pIoc2, ioc2_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage2sz, pIoc2, ioc2_dma);
return rc;
}
@@ -6053,7 +6066,8 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
/* Read Header good, alloc memory
*/
iocpage3sz = header.PageLength * 4;
- pIoc3 = pci_alloc_consistent(ioc->pcidev, iocpage3sz, &ioc3_dma);
+ pIoc3 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage3sz, &ioc3_dma,
+ GFP_KERNEL);
if (!pIoc3)
return 0;
@@ -6070,7 +6084,7 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
}
}
- pci_free_consistent(ioc->pcidev, iocpage3sz, pIoc3, ioc3_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage3sz, pIoc3, ioc3_dma);
return 0;
}
@@ -6104,7 +6118,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc)
if ( (pIoc4 = ioc->spi_data.pIocPg4) == NULL ) {
iocpage4sz = (header.PageLength + 4) * 4; /* Allow 4 additional SEP's */
- pIoc4 = pci_alloc_consistent(ioc->pcidev, iocpage4sz, &ioc4_dma);
+ pIoc4 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage4sz,
+ &ioc4_dma, GFP_KERNEL);
if (!pIoc4)
return;
ioc->alloc_total += iocpage4sz;
@@ -6122,7 +6137,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc)
ioc->spi_data.IocPg4_dma = ioc4_dma;
ioc->spi_data.IocPg4Sz = iocpage4sz;
} else {
- pci_free_consistent(ioc->pcidev, iocpage4sz, pIoc4, ioc4_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage4sz, pIoc4,
+ ioc4_dma);
ioc->spi_data.pIocPg4 = NULL;
ioc->alloc_total -= iocpage4sz;
}
@@ -6159,7 +6175,8 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc)
/* Read Header good, alloc memory
*/
iocpage1sz = header.PageLength * 4;
- pIoc1 = pci_alloc_consistent(ioc->pcidev, iocpage1sz, &ioc1_dma);
+ pIoc1 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage1sz, &ioc1_dma,
+ GFP_KERNEL);
if (!pIoc1)
return;
@@ -6210,7 +6227,7 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc)
}
}
- pci_free_consistent(ioc->pcidev, iocpage1sz, pIoc1, ioc1_dma);
+ dma_free_coherent(&ioc->pcidev->dev, iocpage1sz, pIoc1, ioc1_dma);
return;
}
@@ -6239,7 +6256,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc)
goto out;
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
- pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &buf_dma, GFP_KERNEL);
if (!pbuf)
goto out;
@@ -6255,7 +6273,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc)
out:
if (pbuf)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, pbuf,
+ buf_dma);
}
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index ae433c150b37..03c8fb1795c2 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -1041,14 +1041,15 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
* copying the data in this array into the correct place in the
* request and chain buffers.
*/
- sglbuf = pci_alloc_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf_dma);
+ sglbuf = dma_alloc_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES,
+ sglbuf_dma, GFP_KERNEL);
if (sglbuf == NULL)
goto free_and_fail;
if (sgdir & 0x04000000)
- dir = PCI_DMA_TODEVICE;
+ dir = DMA_TO_DEVICE;
else
- dir = PCI_DMA_FROMDEVICE;
+ dir = DMA_FROM_DEVICE;
/* At start:
* sgl = sglbuf = point to beginning of sg buffer
@@ -1062,9 +1063,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
while (bytes_allocd < bytes) {
this_alloc = min(alloc_sz, bytes-bytes_allocd);
buflist[buflist_ent].len = this_alloc;
- buflist[buflist_ent].kptr = pci_alloc_consistent(ioc->pcidev,
- this_alloc,
- &pa);
+ buflist[buflist_ent].kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ this_alloc,
+ &pa, GFP_KERNEL);
if (buflist[buflist_ent].kptr == NULL) {
alloc_sz = alloc_sz / 2;
if (alloc_sz == 0) {
@@ -1080,8 +1081,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
bytes_allocd += this_alloc;
sgl->FlagsLength = (0x10000000|sgdir|this_alloc);
- dma_addr = pci_map_single(ioc->pcidev,
- buflist[buflist_ent].kptr, this_alloc, dir);
+ dma_addr = dma_map_single(&ioc->pcidev->dev,
+ buflist[buflist_ent].kptr,
+ this_alloc, dir);
sgl->Address = dma_addr;
fragcnt++;
@@ -1140,9 +1142,11 @@ free_and_fail:
kptr = buflist[i].kptr;
len = buflist[i].len;
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr,
+ dma_addr);
}
- pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf, *sglbuf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sglbuf,
+ *sglbuf_dma);
}
kfree(buflist);
return NULL;
@@ -1162,9 +1166,9 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
int n = 0;
if (sg->FlagsLength & 0x04000000)
- dir = PCI_DMA_TODEVICE;
+ dir = DMA_TO_DEVICE;
else
- dir = PCI_DMA_FROMDEVICE;
+ dir = DMA_FROM_DEVICE;
nib = (sg->FlagsLength & 0xF0000000) >> 28;
while (! (nib & 0x4)) { /* eob */
@@ -1179,8 +1183,10 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
dma_addr = sg->Address;
kptr = bl->kptr;
len = bl->len;
- pci_unmap_single(ioc->pcidev, dma_addr, len, dir);
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_unmap_single(&ioc->pcidev->dev, dma_addr, len,
+ dir);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr,
+ dma_addr);
n++;
}
sg++;
@@ -1197,12 +1203,12 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE
dma_addr = sg->Address;
kptr = bl->kptr;
len = bl->len;
- pci_unmap_single(ioc->pcidev, dma_addr, len, dir);
- pci_free_consistent(ioc->pcidev, len, kptr, dma_addr);
+ dma_unmap_single(&ioc->pcidev->dev, dma_addr, len, dir);
+ dma_free_coherent(&ioc->pcidev->dev, len, kptr, dma_addr);
n++;
}
- pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sgl, sgl_dma);
+ dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sgl, sgl_dma);
kfree(buflist);
dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "-SG: Free'd 1 SGL buf + %d kbufs!\n",
ioc->name, n));
@@ -2100,8 +2106,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u
}
flagsLength |= karg.dataOutSize;
bufOut.len = karg.dataOutSize;
- bufOut.kptr = pci_alloc_consistent(
- ioc->pcidev, bufOut.len, &dma_addr_out);
+ bufOut.kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ bufOut.len,
+ &dma_addr_out, GFP_KERNEL);
if (bufOut.kptr == NULL) {
rc = -ENOMEM;
@@ -2134,8 +2141,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u
flagsLength |= karg.dataInSize;
bufIn.len = karg.dataInSize;
- bufIn.kptr = pci_alloc_consistent(ioc->pcidev,
- bufIn.len, &dma_addr_in);
+ bufIn.kptr = dma_alloc_coherent(&ioc->pcidev->dev,
+ bufIn.len,
+ &dma_addr_in, GFP_KERNEL);
if (bufIn.kptr == NULL) {
rc = -ENOMEM;
@@ -2283,13 +2291,13 @@ done_free_mem:
/* Free the allocated memory.
*/
if (bufOut.kptr != NULL) {
- pci_free_consistent(ioc->pcidev,
- bufOut.len, (void *) bufOut.kptr, dma_addr_out);
+ dma_free_coherent(&ioc->pcidev->dev, bufOut.len,
+ (void *)bufOut.kptr, dma_addr_out);
}
if (bufIn.kptr != NULL) {
- pci_free_consistent(ioc->pcidev,
- bufIn.len, (void *) bufIn.kptr, dma_addr_in);
+ dma_free_coherent(&ioc->pcidev->dev, bufIn.len,
+ (void *)bufIn.kptr, dma_addr_in);
}
/* mf is null if command issued successfully
@@ -2395,7 +2403,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
/* Issue the second config page request */
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
- pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev,
+ hdr.PageLength * 4,
+ &buf_dma, GFP_KERNEL);
if (pbuf) {
cfg.physAddr = buf_dma;
if (mpt_config(ioc, &cfg) == 0) {
@@ -2405,7 +2415,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
pdata->BoardTracerNumber, 24);
}
}
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev,
+ hdr.PageLength * 4, pbuf,
+ buf_dma);
pbuf = NULL;
}
}
@@ -2470,7 +2482,7 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size)
else
IstwiRWRequest->DeviceAddr = 0xB0;
- pbuf = pci_alloc_consistent(ioc->pcidev, 4, &buf_dma);
+ pbuf = dma_alloc_coherent(&ioc->pcidev->dev, 4, &buf_dma, GFP_KERNEL);
if (!pbuf)
goto out;
ioc->add_sge((char *)&IstwiRWRequest->SGL,
@@ -2519,7 +2531,7 @@ retry_wait:
SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
if (pbuf)
- pci_free_consistent(ioc->pcidev, 4, pbuf, buf_dma);
+ dma_free_coherent(&ioc->pcidev->dev, 4, pbuf, buf_dma);
/* Copy the data from kernel memory to user memory
*/
@@ -2585,7 +2597,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
/* Get the data transfer speeds
*/
data_sz = ioc->spi_data.sdp0length * 4;
- pg0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ pg0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, &page_dma,
+ GFP_KERNEL);
if (pg0_alloc) {
hdr.PageVersion = ioc->spi_data.sdp0version;
hdr.PageLength = data_sz;
@@ -2623,7 +2636,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
karg.negotiated_speed = HP_DEV_SPEED_ASYNC;
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg0_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz, (u8 *)pg0_alloc,
+ page_dma);
}
/* Set defaults
@@ -2649,7 +2663,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
/* Issue the second config page request */
cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
data_sz = (int) cfg.cfghdr.hdr->PageLength * 4;
- pg3_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma);
+ pg3_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz,
+ &page_dma, GFP_KERNEL);
if (pg3_alloc) {
cfg.physAddr = page_dma;
cfg.pageAddr = (karg.hdr.channel << 8) | karg.hdr.id;
@@ -2658,7 +2673,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg)
karg.phase_errors = (u32) le16_to_cpu(pg3_alloc->PhaseErrorCount);
karg.parity_errors = (u32) le16_to_cpu(pg3_alloc->ParityErrorCount);
}
- pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg3_alloc, page_dma);
+ dma_free_coherent(&ioc->pcidev->dev, data_sz,
+ (u8 *)pg3_alloc, page_dma);
}
}
hd = shost_priv(ioc->sh);
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 117fa4ebf6d7..142eb5d5d9df 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -516,9 +516,9 @@ mpt_lan_close(struct net_device *dev)
if (priv->RcvCtl[i].skb != NULL) {
/**/ dlprintk((KERN_INFO MYNAM "/lan_close: bucket %05x "
/**/ "is still out\n", i));
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[i].dma,
- priv->RcvCtl[i].len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[i].dma,
+ priv->RcvCtl[i].len, DMA_FROM_DEVICE);
dev_kfree_skb(priv->RcvCtl[i].skb);
}
}
@@ -528,9 +528,9 @@ mpt_lan_close(struct net_device *dev)
for (i = 0; i < priv->tx_max_out; i++) {
if (priv->SendCtl[i].skb != NULL) {
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[i].dma,
- priv->SendCtl[i].len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->SendCtl[i].dma,
+ priv->SendCtl[i].len, DMA_TO_DEVICE);
dev_kfree_skb(priv->SendCtl[i].skb);
}
}
@@ -582,8 +582,8 @@ mpt_lan_send_turbo(struct net_device *dev, u32 tmsg)
__func__, sent));
priv->SendCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma,
- priv->SendCtl[ctx].len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->SendCtl[ctx].dma,
+ priv->SendCtl[ctx].len, DMA_TO_DEVICE);
dev_kfree_skb_irq(sent);
spin_lock_irqsave(&priv->txfidx_lock, flags);
@@ -648,8 +648,9 @@ mpt_lan_send_reply(struct net_device *dev, LANSendReply_t *pSendRep)
__func__, sent));
priv->SendCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma,
- priv->SendCtl[ctx].len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev,
+ priv->SendCtl[ctx].dma,
+ priv->SendCtl[ctx].len, DMA_TO_DEVICE);
dev_kfree_skb_irq(sent);
priv->mpt_txfidx[++priv->mpt_txfidx_tail] = ctx;
@@ -720,8 +721,8 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, 12);
- dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
- PCI_DMA_TODEVICE);
+ dma = dma_map_single(&mpt_dev->pcidev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
priv->SendCtl[ctx].skb = skb;
priv->SendCtl[ctx].dma = dma;
@@ -868,13 +869,17 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
return -ENOMEM;
}
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
- pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
goto out;
}
@@ -882,8 +887,8 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
out:
spin_lock_irqsave(&priv->rxfidx_lock, flags);
@@ -927,8 +932,8 @@ mpt_lan_receive_post_free(struct net_device *dev,
// dlprintk((KERN_INFO MYNAM "@rpr[2] TC + 3\n"));
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1028,16 +1033,16 @@ mpt_lan_receive_post_reply(struct net_device *dev,
// IOC_AND_NETDEV_NAMES_s_s(dev),
// i, l));
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
- pci_dma_sync_single_for_device(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
szrem -= l;
@@ -1056,17 +1061,17 @@ mpt_lan_receive_post_reply(struct net_device *dev,
return -ENOMEM;
}
- pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
- pci_dma_sync_single_for_device(mpt_dev->pcidev,
- priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&mpt_dev->pcidev->dev,
+ priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len,
+ DMA_FROM_DEVICE);
spin_lock_irqsave(&priv->rxfidx_lock, flags);
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1077,8 +1082,8 @@ mpt_lan_receive_post_reply(struct net_device *dev,
priv->RcvCtl[ctx].skb = NULL;
- pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
- priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma,
+ priv->RcvCtl[ctx].len, DMA_FROM_DEVICE);
priv->RcvCtl[ctx].dma = 0;
priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
@@ -1199,10 +1204,10 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv)
skb = priv->RcvCtl[ctx].skb;
if (skb && (priv->RcvCtl[ctx].len != len)) {
- pci_unmap_single(mpt_dev->pcidev,
+ dma_unmap_single(&mpt_dev->pcidev->dev,
priv->RcvCtl[ctx].dma,
priv->RcvCtl[ctx].len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
dev_kfree_skb(priv->RcvCtl[ctx].skb);
skb = priv->RcvCtl[ctx].skb = NULL;
}
@@ -1218,8 +1223,9 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv)
break;
}
- dma = pci_map_single(mpt_dev->pcidev, skb->data,
- len, PCI_DMA_FROMDEVICE);
+ dma = dma_map_single(&mpt_dev->pcidev->dev,
+ skb->data, len,
+ DMA_FROM_DEVICE);
priv->RcvCtl[ctx].skb = skb;
priv->RcvCtl[ctx].dma = dma;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 091b45024d34..4acd8f9a48e1 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -702,8 +702,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -769,8 +769,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/**
@@ -1399,8 +1399,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -1426,8 +1426,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
enclosure->sep_channel = buffer->SEPBus;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2058,8 +2058,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
if (!hdr.ExtPageLength)
return -ENXIO;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -2081,8 +2081,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
le32_to_cpu(buffer->PhyResetProblemCount);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
return error;
}
@@ -2301,7 +2301,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
<< MPI_SGE_FLAGS_SHIFT;
if (!dma_map_sg(&ioc->pcidev->dev, job->request_payload.sg_list,
- 1, PCI_DMA_BIDIRECTIONAL))
+ 1, DMA_BIDIRECTIONAL))
goto put_mf;
flagsLength |= (sg_dma_len(job->request_payload.sg_list) - 4);
@@ -2318,7 +2318,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
flagsLength = flagsLength << MPI_SGE_FLAGS_SHIFT;
if (!dma_map_sg(&ioc->pcidev->dev, job->reply_payload.sg_list,
- 1, PCI_DMA_BIDIRECTIONAL))
+ 1, DMA_BIDIRECTIONAL))
goto unmap_out;
flagsLength |= sg_dma_len(job->reply_payload.sg_list) + 4;
ioc->add_sge(psge, flagsLength,
@@ -2356,10 +2356,10 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
unmap_in:
dma_unmap_sg(&ioc->pcidev->dev, job->reply_payload.sg_list, 1,
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
unmap_out:
dma_unmap_sg(&ioc->pcidev->dev, job->request_payload.sg_list, 1,
- PCI_DMA_BIDIRECTIONAL);
+ DMA_BIDIRECTIONAL);
put_mf:
if (mf)
mpt_free_msg_frame(ioc, mf);
@@ -2412,8 +2412,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2452,8 +2452,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
}
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2487,8 +2487,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc)
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2509,8 +2509,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc)
device_missing_delay & MPI_SAS_IOUNIT1_REPORT_MISSING_TIMEOUT_MASK;
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2551,8 +2551,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2573,8 +2573,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2614,8 +2614,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2654,8 +2654,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
device_info->flags = le16_to_cpu(buffer->Flags);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2697,8 +2697,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2737,8 +2737,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
}
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2777,8 +2777,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
goto out;
}
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer) {
error = -ENOMEM;
goto out;
@@ -2810,8 +2810,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
out_free_consistent:
- pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
- buffer, dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer,
+ dma_handle);
out:
return error;
}
@@ -2896,7 +2896,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
sz = sizeof(struct rep_manu_request) + sizeof(struct rep_manu_reply);
- data_out = pci_alloc_consistent(ioc->pcidev, sz, &data_out_dma);
+ data_out = dma_alloc_coherent(&ioc->pcidev->dev, sz, &data_out_dma,
+ GFP_KERNEL);
if (!data_out) {
printk(KERN_ERR "Memory allocation failure at %s:%d/%s()!\n",
__FILE__, __LINE__, __func__);
@@ -2987,7 +2988,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
}
out_free:
if (data_out_dma)
- pci_free_consistent(ioc->pcidev, sz, data_out, data_out_dma);
+ dma_free_coherent(&ioc->pcidev->dev, sz, data_out,
+ data_out_dma);
put_mf:
if (mf)
mpt_free_msg_frame(ioc, mf);
@@ -4271,8 +4273,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
if (!hdr.PageLength)
goto out;
- buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
- &dma_handle);
+ buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ &dma_handle, GFP_KERNEL);
if (!buffer)
goto out;
@@ -4318,8 +4320,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
out:
if (buffer)
- pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
- dma_handle);
+ dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4,
+ buffer, dma_handle);
}
/*
* Work queue thread to handle SAS hotplug events
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index c173a5e88c91..315c43f17dd3 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -570,6 +570,7 @@ static struct attribute *afu_cr_attrs[] = {
&class_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(afu_cr);
static void release_afu_config_record(struct kobject *kobj)
{
@@ -581,7 +582,7 @@ static void release_afu_config_record(struct kobject *kobj)
static struct kobj_type afu_config_record_type = {
.sysfs_ops = &kobj_sysfs_ops,
.release = release_afu_config_record,
- .default_attrs = afu_cr_attrs,
+ .default_groups = afu_cr_groups,
};
static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx)
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 49ab656e8a96..633e1cf08d6e 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -68,11 +68,6 @@
* which won't work on pure SMBus systems.
*/
-struct at24_client {
- struct i2c_client *client;
- struct regmap *regmap;
-};
-
struct at24_data {
/*
* Lock protects against activities from other Linux tasks,
@@ -94,9 +89,10 @@ struct at24_data {
/*
* Some chips tie up multiple I2C addresses; dummy devices reserve
- * them for us, and we'll use them with SMBus calls.
+ * them for us.
*/
- struct at24_client client[];
+ u8 bank_addr_shift;
+ struct regmap *client_regmaps[];
};
/*
@@ -123,6 +119,7 @@ MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)");
struct at24_chip_data {
u32 byte_len;
u8 flags;
+ u8 bank_addr_shift;
void (*read_post)(unsigned int off, char *buf, size_t count);
};
@@ -137,6 +134,12 @@ struct at24_chip_data {
.read_post = _read_post, \
}
+#define AT24_CHIP_DATA_BS(_name, _len, _flags, _bank_addr_shift) \
+ static const struct at24_chip_data _name = { \
+ .byte_len = _len, .flags = _flags, \
+ .bank_addr_shift = _bank_addr_shift \
+ }
+
static void at24_read_post_vaio(unsigned int off, char *buf, size_t count)
{
int i;
@@ -197,6 +200,7 @@ AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16);
AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16);
AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16);
AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA_BS(at24_data_24c1025, 1048576 / 8, AT24_FLAG_ADDR16, 2);
AT24_CHIP_DATA(at24_data_24c2048, 2097152 / 8, AT24_FLAG_ADDR16);
/* identical to 24c08 ? */
AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0);
@@ -225,6 +229,7 @@ static const struct i2c_device_id at24_ids[] = {
{ "24c256", (kernel_ulong_t)&at24_data_24c256 },
{ "24c512", (kernel_ulong_t)&at24_data_24c512 },
{ "24c1024", (kernel_ulong_t)&at24_data_24c1024 },
+ { "24c1025", (kernel_ulong_t)&at24_data_24c1025 },
{ "24c2048", (kernel_ulong_t)&at24_data_24c2048 },
{ "at24", 0 },
{ /* END OF LIST */ }
@@ -254,6 +259,7 @@ static const struct of_device_id at24_of_match[] = {
{ .compatible = "atmel,24c256", .data = &at24_data_24c256 },
{ .compatible = "atmel,24c512", .data = &at24_data_24c512 },
{ .compatible = "atmel,24c1024", .data = &at24_data_24c1024 },
+ { .compatible = "atmel,24c1025", .data = &at24_data_24c1025 },
{ .compatible = "atmel,24c2048", .data = &at24_data_24c2048 },
{ /* END OF LIST */ },
};
@@ -275,8 +281,8 @@ MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
* set the byte address; on a multi-master board, another master
* may have changed the chip's "current" address pointer.
*/
-static struct at24_client *at24_translate_offset(struct at24_data *at24,
- unsigned int *offset)
+static struct regmap *at24_translate_offset(struct at24_data *at24,
+ unsigned int *offset)
{
unsigned int i;
@@ -288,12 +294,12 @@ static struct at24_client *at24_translate_offset(struct at24_data *at24,
*offset &= 0xff;
}
- return &at24->client[i];
+ return at24->client_regmaps[i];
}
static struct device *at24_base_client_dev(struct at24_data *at24)
{
- return &at24->client[0].client->dev;
+ return regmap_get_device(at24->client_regmaps[0]);
}
static size_t at24_adjust_read_count(struct at24_data *at24,
@@ -324,14 +330,10 @@ static ssize_t at24_regmap_read(struct at24_data *at24, char *buf,
unsigned int offset, size_t count)
{
unsigned long timeout, read_time;
- struct at24_client *at24_client;
- struct i2c_client *client;
struct regmap *regmap;
int ret;
- at24_client = at24_translate_offset(at24, &offset);
- regmap = at24_client->regmap;
- client = at24_client->client;
+ regmap = at24_translate_offset(at24, &offset);
count = at24_adjust_read_count(at24, offset, count);
/* adjust offset for mac and serial read ops */
@@ -346,7 +348,7 @@ static ssize_t at24_regmap_read(struct at24_data *at24, char *buf,
read_time = jiffies;
ret = regmap_bulk_read(regmap, offset, buf, count);
- dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
+ dev_dbg(regmap_get_device(regmap), "read %zu@%d --> %d (%ld)\n",
count, offset, ret, jiffies);
if (!ret)
return count;
@@ -387,14 +389,10 @@ static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf,
unsigned int offset, size_t count)
{
unsigned long timeout, write_time;
- struct at24_client *at24_client;
- struct i2c_client *client;
struct regmap *regmap;
int ret;
- at24_client = at24_translate_offset(at24, &offset);
- regmap = at24_client->regmap;
- client = at24_client->client;
+ regmap = at24_translate_offset(at24, &offset);
count = at24_adjust_write_count(at24, offset, count);
timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
@@ -406,7 +404,7 @@ static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf,
write_time = jiffies;
ret = regmap_bulk_write(regmap, offset, buf, count);
- dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n",
+ dev_dbg(regmap_get_device(regmap), "write %zu@%d --> %d (%ld)\n",
count, offset, ret, jiffies);
if (!ret)
return count;
@@ -538,17 +536,16 @@ static const struct at24_chip_data *at24_get_chip_data(struct device *dev)
}
static int at24_make_dummy_client(struct at24_data *at24, unsigned int index,
+ struct i2c_client *base_client,
struct regmap_config *regmap_config)
{
- struct i2c_client *base_client, *dummy_client;
+ struct i2c_client *dummy_client;
struct regmap *regmap;
- struct device *dev;
-
- base_client = at24->client[0].client;
- dev = &base_client->dev;
- dummy_client = devm_i2c_new_dummy_device(dev, base_client->adapter,
- base_client->addr + index);
+ dummy_client = devm_i2c_new_dummy_device(&base_client->dev,
+ base_client->adapter,
+ base_client->addr +
+ (index << at24->bank_addr_shift));
if (IS_ERR(dummy_client))
return PTR_ERR(dummy_client);
@@ -556,8 +553,7 @@ static int at24_make_dummy_client(struct at24_data *at24, unsigned int index,
if (IS_ERR(regmap))
return PTR_ERR(regmap);
- at24->client[index].client = dummy_client;
- at24->client[index].regmap = regmap;
+ at24->client_regmaps[index] = regmap;
return 0;
}
@@ -680,7 +676,7 @@ static int at24_probe(struct i2c_client *client)
if (IS_ERR(regmap))
return PTR_ERR(regmap);
- at24 = devm_kzalloc(dev, struct_size(at24, client, num_addresses),
+ at24 = devm_kzalloc(dev, struct_size(at24, client_regmaps, num_addresses),
GFP_KERNEL);
if (!at24)
return -ENOMEM;
@@ -690,10 +686,10 @@ static int at24_probe(struct i2c_client *client)
at24->page_size = page_size;
at24->flags = flags;
at24->read_post = cdata->read_post;
+ at24->bank_addr_shift = cdata->bank_addr_shift;
at24->num_addresses = num_addresses;
at24->offset_adj = at24_get_offset_adj(flags, byte_len);
- at24->client[0].client = client;
- at24->client[0].regmap = regmap;
+ at24->client_regmaps[0] = regmap;
at24->vcc_reg = devm_regulator_get(dev, "vcc");
if (IS_ERR(at24->vcc_reg))
@@ -709,7 +705,7 @@ static int at24_probe(struct i2c_client *client)
/* use dummy devices for multiple-address chips */
for (i = 1; i < num_addresses; i++) {
- err = at24_make_dummy_client(at24, i, &regmap_config);
+ err = at24_make_dummy_client(at24, i, client, &regmap_config);
if (err)
return err;
}
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index b38978a3b3ff..c3305bdda69c 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -1,28 +1,27 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models
- * and Cypress FRAMs FM25 models
+ * Driver for most of the SPI EEPROMs, such as Atmel AT25 models
+ * and Cypress FRAMs FM25 models.
*
* Copyright (C) 2006 David Brownell
*/
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/bits.h>
#include <linux/delay.h>
#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/property.h>
#include <linux/sched.h>
+#include <linux/slab.h>
-#include <linux/nvmem-provider.h>
-#include <linux/spi/spi.h>
#include <linux/spi/eeprom.h>
-#include <linux/property.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/math.h>
+#include <linux/spi/spi.h>
+
+#include <linux/nvmem-provider.h>
/*
- * NOTE: this is an *EEPROM* driver. The vagaries of product naming
+ * NOTE: this is an *EEPROM* driver. The vagaries of product naming
* mean that some AT25 products are EEPROMs, and others are FLASH.
* Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver,
* not this one!
@@ -33,9 +32,9 @@
#define FM25_SN_LEN 8 /* serial number length */
struct at25_data {
+ struct spi_eeprom chip;
struct spi_device *spi;
struct mutex lock;
- struct spi_eeprom chip;
unsigned addrlen;
struct nvmem_config nvmem_config;
struct nvmem_device *nvmem;
@@ -58,13 +57,14 @@ struct at25_data {
#define AT25_SR_BP1 0x08
#define AT25_SR_WPEN 0x80 /* writeprotect enable */
-#define AT25_INSTR_BIT3 0x08 /* Additional address bit in instr */
+#define AT25_INSTR_BIT3 0x08 /* additional address bit in instr */
#define FM25_ID_LEN 9 /* ID length */
#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
-/* Specs often allow 5 msec for a page write, sometimes 20 msec;
+/*
+ * Specs often allow 5ms for a page write, sometimes 20ms;
* it's important to recover from write timeouts.
*/
#define EE_TIMEOUT 25
@@ -96,7 +96,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
instr = AT25_READ;
if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
- if (offset >= (1U << (at25->addrlen * 8)))
+ if (offset >= BIT(at25->addrlen * 8))
instr |= AT25_INSTR_BIT3;
*cp++ = instr;
@@ -109,7 +109,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
*cp++ = offset >> 8;
fallthrough;
case 1:
- case 0: /* can't happen: for better codegen */
+ case 0: /* can't happen: for better code generation */
*cp++ = offset >> 0;
}
@@ -126,11 +126,12 @@ static int at25_ee_read(void *priv, unsigned int offset,
mutex_lock(&at25->lock);
- /* Read it all at once.
+ /*
+ * Read it all at once.
*
* REVISIT that's potentially a problem with large chips, if
* other devices on the bus need to be accessed regularly or
- * this chip is clocked very slowly
+ * this chip is clocked very slowly.
*/
status = spi_sync(at25->spi, &m);
dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
@@ -140,9 +141,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
return status;
}
-/*
- * read extra registers as ID or serial number
- */
+/* Read extra registers as ID or serial number */
static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
int len)
{
@@ -208,7 +207,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
if (!bounce)
return -ENOMEM;
- /* For write, rollover is within the page ... so we write at
+ /*
+ * For write, rollover is within the page ... so we write at
* most one page, then manually roll over to the next page.
*/
mutex_lock(&at25->lock);
@@ -229,7 +229,7 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
instr = AT25_WRITE;
if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
- if (offset >= (1U << (at25->addrlen * 8)))
+ if (offset >= BIT(at25->addrlen * 8))
instr |= AT25_INSTR_BIT3;
*cp++ = instr;
@@ -242,7 +242,7 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
*cp++ = offset >> 8;
fallthrough;
case 1:
- case 0: /* can't happen: for better codegen */
+ case 0: /* can't happen: for better code generation */
*cp++ = offset >> 0;
}
@@ -258,8 +258,9 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
if (status < 0)
break;
- /* REVISIT this should detect (or prevent) failed writes
- * to readonly sections of the EEPROM...
+ /*
+ * REVISIT this should detect (or prevent) failed writes
+ * to read-only sections of the EEPROM...
*/
/* Wait for non-busy status */
@@ -306,34 +307,37 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
{
u32 val;
+ int err;
- memset(chip, 0, sizeof(*chip));
strncpy(chip->name, "at25", sizeof(chip->name));
- if (device_property_read_u32(dev, "size", &val) == 0 ||
- device_property_read_u32(dev, "at25,byte-len", &val) == 0) {
- chip->byte_len = val;
- } else {
+ err = device_property_read_u32(dev, "size", &val);
+ if (err)
+ err = device_property_read_u32(dev, "at25,byte-len", &val);
+ if (err) {
dev_err(dev, "Error: missing \"size\" property\n");
- return -ENODEV;
+ return err;
}
+ chip->byte_len = val;
- if (device_property_read_u32(dev, "pagesize", &val) == 0 ||
- device_property_read_u32(dev, "at25,page-size", &val) == 0) {
- chip->page_size = val;
- } else {
+ err = device_property_read_u32(dev, "pagesize", &val);
+ if (err)
+ err = device_property_read_u32(dev, "at25,page-size", &val);
+ if (err) {
dev_err(dev, "Error: missing \"pagesize\" property\n");
- return -ENODEV;
+ return err;
}
+ chip->page_size = val;
- if (device_property_read_u32(dev, "at25,addr-mode", &val) == 0) {
+ err = device_property_read_u32(dev, "address-width", &val);
+ if (err) {
+ err = device_property_read_u32(dev, "at25,addr-mode", &val);
+ if (err) {
+ dev_err(dev, "Error: missing \"address-width\" property\n");
+ return err;
+ }
chip->flags = (u16)val;
} else {
- if (device_property_read_u32(dev, "address-width", &val)) {
- dev_err(dev,
- "Error: missing \"address-width\" property\n");
- return -ENODEV;
- }
switch (val) {
case 9:
chip->flags |= EE_INSTR_BIT3_IS_ADDR;
@@ -359,16 +363,54 @@ static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
return 0;
}
+static int at25_fram_to_chip(struct device *dev, struct spi_eeprom *chip)
+{
+ struct at25_data *at25 = container_of(chip, struct at25_data, chip);
+ u8 sernum[FM25_SN_LEN];
+ u8 id[FM25_ID_LEN];
+ int i;
+
+ strncpy(chip->name, "fm25", sizeof(chip->name));
+
+ /* Get ID of chip */
+ fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN);
+ if (id[6] != 0xc2) {
+ dev_err(dev, "Error: no Cypress FRAM (id %02x)\n", id[6]);
+ return -ENODEV;
+ }
+ /* Set size found in ID */
+ if (id[7] < 0x21 || id[7] > 0x26) {
+ dev_err(dev, "Error: unsupported size (id %02x)\n", id[7]);
+ return -ENODEV;
+ }
+
+ chip->byte_len = BIT(id[7] - 0x21 + 4) * 1024;
+ if (chip->byte_len > 64 * 1024)
+ chip->flags |= EE_ADDR3;
+ else
+ chip->flags |= EE_ADDR2;
+
+ if (id[8]) {
+ fm25_aux_read(at25, sernum, FM25_RDSN, FM25_SN_LEN);
+ /* Swap byte order */
+ for (i = 0; i < FM25_SN_LEN; i++)
+ at25->sernum[i] = sernum[FM25_SN_LEN - 1 - i];
+ }
+
+ chip->page_size = PAGE_SIZE;
+ return 0;
+}
+
static const struct of_device_id at25_of_match[] = {
- { .compatible = "atmel,at25",},
- { .compatible = "cypress,fm25",},
+ { .compatible = "atmel,at25" },
+ { .compatible = "cypress,fm25" },
{ }
};
MODULE_DEVICE_TABLE(of, at25_of_match);
static const struct spi_device_id at25_spi_ids[] = {
- { .name = "at25",},
- { .name = "fm25",},
+ { .name = "at25" },
+ { .name = "fm25" },
{ }
};
MODULE_DEVICE_TABLE(spi, at25_spi_ids);
@@ -378,31 +420,18 @@ static int at25_probe(struct spi_device *spi)
struct at25_data *at25 = NULL;
int err;
int sr;
- u8 id[FM25_ID_LEN];
- u8 sernum[FM25_SN_LEN];
- int i;
- const struct of_device_id *match;
- bool is_fram = 0;
-
- match = of_match_device(of_match_ptr(at25_of_match), &spi->dev);
- if (match && !strcmp(match->compatible, "cypress,fm25"))
- is_fram = 1;
-
- at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL);
- if (!at25)
- return -ENOMEM;
-
- /* Chip description */
- if (spi->dev.platform_data) {
- memcpy(&at25->chip, spi->dev.platform_data, sizeof(at25->chip));
- } else if (!is_fram) {
- err = at25_fw_to_chip(&spi->dev, &at25->chip);
- if (err)
- return err;
- }
-
- /* Ping the chip ... the status register is pretty portable,
- * unlike probing manufacturer IDs. We do expect that system
+ struct spi_eeprom *pdata;
+ bool is_fram;
+
+ err = device_property_match_string(&spi->dev, "compatible", "cypress,fm25");
+ if (err >= 0)
+ is_fram = true;
+ else
+ is_fram = false;
+
+ /*
+ * Ping the chip ... the status register is pretty portable,
+ * unlike probing manufacturer IDs. We do expect that system
* firmware didn't write it in the past few milliseconds!
*/
sr = spi_w8r8(spi, AT25_RDSR);
@@ -415,35 +444,17 @@ static int at25_probe(struct spi_device *spi)
at25->spi = spi;
spi_set_drvdata(spi, at25);
- if (is_fram) {
- /* Get ID of chip */
- fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN);
- if (id[6] != 0xc2) {
- dev_err(&spi->dev,
- "Error: no Cypress FRAM (id %02x)\n", id[6]);
- return -ENODEV;
- }
- /* set size found in ID */
- if (id[7] < 0x21 || id[7] > 0x26) {
- dev_err(&spi->dev, "Error: unsupported size (id %02x)\n", id[7]);
- return -ENODEV;
- }
- at25->chip.byte_len = int_pow(2, id[7] - 0x21 + 4) * 1024;
-
- if (at25->chip.byte_len > 64 * 1024)
- at25->chip.flags |= EE_ADDR3;
+ /* Chip description */
+ pdata = dev_get_platdata(&spi->dev);
+ if (pdata) {
+ at25->chip = *pdata;
+ } else {
+ if (is_fram)
+ err = at25_fram_to_chip(&spi->dev, &at25->chip);
else
- at25->chip.flags |= EE_ADDR2;
-
- if (id[8]) {
- fm25_aux_read(at25, sernum, FM25_RDSN, FM25_SN_LEN);
- /* swap byte order */
- for (i = 0; i < FM25_SN_LEN; i++)
- at25->sernum[i] = sernum[FM25_SN_LEN - 1 - i];
- }
-
- at25->chip.page_size = PAGE_SIZE;
- strncpy(at25->chip.name, "fm25", sizeof(at25->chip.name));
+ err = at25_fw_to_chip(&spi->dev, &at25->chip);
+ if (err)
+ return err;
}
/* For now we only support 8/16/24 bit addressing */
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 8132a84698d5..3c0ae07a2d80 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -57,7 +57,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
}
va_block->start = virt_addr;
- va_block->end = virt_addr + page_size;
+ va_block->end = virt_addr + page_size - 1;
va_block->size = page_size;
list_add_tail(&va_block->node, &cb->va_block_list);
}
@@ -80,13 +80,13 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
offset += va_block->size;
}
- hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
+ rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
mutex_unlock(&ctx->mmu_lock);
cb->is_mmu_mapped = true;
- return 0;
+ return rc;
err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) {
@@ -97,7 +97,7 @@ err_va_umap:
offset -= va_block->size;
}
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+ rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
@@ -126,7 +126,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
"Failed to unmap CB's va 0x%llx\n",
va_block->start);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+ hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
@@ -250,8 +250,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
* Can't use generic function to check this because of special case
* where we create a CB as part of the reset process
*/
- if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
- (ctx_id != HL_KERNEL_ASID_ID))) {
+ if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) {
dev_warn_ratelimited(hdev->dev,
"Device is disabled or in reset. Can't create new CBs\n");
rc = -EBUSY;
@@ -380,8 +379,9 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
}
static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u64 cb_handle, u32 *usage_cnt)
+ u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
+ struct hl_vm_va_block *va_block;
struct hl_cb *cb;
u32 handle;
int rc = 0;
@@ -402,7 +402,18 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
goto out;
}
- *usage_cnt = atomic_read(&cb->cs_cnt);
+ if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
+ va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
+ if (va_block) {
+ *device_va = va_block->start;
+ } else {
+ dev_err(hdev->dev, "CB is not mapped to the device's MMU\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ *usage_cnt = atomic_read(&cb->cs_cnt);
+ }
out:
spin_unlock(&mgr->cb_lock);
@@ -414,7 +425,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev;
enum hl_device_status status;
- u64 handle = 0;
+ u64 handle = 0, device_va;
u32 usage_cnt = 0;
int rc;
@@ -450,13 +461,20 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
case HL_CB_OP_INFO:
rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle,
- &usage_cnt);
- memset(args, 0, sizeof(*args));
- args->out.usage_cnt = usage_cnt;
+ args->in.flags,
+ &usage_cnt,
+ &device_va);
+
+ memset(&args->out, 0, sizeof(args->out));
+
+ if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA)
+ args->out.device_va = device_va;
+ else
+ args->out.usage_cnt = usage_cnt;
break;
default:
- rc = -ENOTTY;
+ rc = -EINVAL;
break;
}
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 4c8000fd246c..0a4ef13d9ac4 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -533,8 +533,8 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->stream_master_qid_map)) {
/* extract the timestamp only of first completed CS */
if (!mcs_compl->timestamp)
- mcs_compl->timestamp =
- ktime_to_ns(fence->timestamp);
+ mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
+
complete_all(&mcs_compl->completion);
/*
@@ -733,6 +733,14 @@ static void cs_timedout(struct work_struct *work)
hdev = cs->ctx->hdev;
+ /* Save only the first CS timeout parameters */
+ rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
+ if (!rc) {
+ hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
+ hdev->last_error.cs_timeout_timestamp = ktime_get();
+ hdev->last_error.cs_timeout_seq = cs->sequence;
+ }
+
switch (cs->type) {
case CS_TYPE_SIGNAL:
dev_err(hdev->dev,
@@ -767,9 +775,9 @@ static void cs_timedout(struct work_struct *work)
if (likely(!skip_reset_on_timeout)) {
if (hdev->reset_on_lockup)
- hl_device_reset(hdev, HL_RESET_TDR);
+ hl_device_reset(hdev, HL_DRV_RESET_TDR);
else
- hdev->needs_reset = true;
+ hdev->reset_info.needs_reset = true;
}
}
@@ -806,7 +814,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
cs->timeout_jiffies = timeout;
cs->skip_reset_on_timeout =
- hdev->skip_reset_on_timeout ||
+ hdev->reset_info.skip_reset_on_timeout ||
!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
cs->submission_time_jiffies = jiffies;
INIT_LIST_HEAD(&cs->job_list);
@@ -1131,9 +1139,6 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
enum hl_cs_type cs_type;
if (!hl_device_operational(hdev, &status)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't submit new CS\n",
- hdev->status[status]);
return -EBUSY;
}
@@ -1262,7 +1267,8 @@ static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
u32 num_chunks, u64 *cs_seq, u32 flags,
- u32 encaps_signals_handle, u32 timeout)
+ u32 encaps_signals_handle, u32 timeout,
+ u16 *signal_initial_sob_count)
{
bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
@@ -1429,6 +1435,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
goto free_cs_object;
}
+ *signal_initial_sob_count = cs->initial_sob_count;
+
rc = HL_CS_STATUS_SUCCESS;
goto put_cs;
@@ -1457,6 +1465,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
int rc = 0, do_ctx_switch;
void __user *chunks;
u32 num_chunks, tmp;
+ u16 sob_count;
int ret;
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
@@ -1497,7 +1506,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
- cs_seq, 0, 0, hdev->timeout_jiffies);
+ cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1813,6 +1822,9 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
}
handle->count = count;
+
+ hl_ctx_get(hdev, hpriv->ctx);
+ handle->ctx = hpriv->ctx;
mgr = &hpriv->ctx->sig_mgr;
spin_lock(&mgr->lock);
@@ -1822,7 +1834,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
if (hdl_id < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
rc = -EINVAL;
- goto out;
+ goto put_ctx;
}
handle->id = hdl_id;
@@ -1875,7 +1887,10 @@ remove_idr:
idr_remove(&mgr->handles, hdl_id);
spin_unlock(&mgr->lock);
+put_ctx:
+ hl_ctx_put(handle->ctx);
kfree(handle);
+
out:
return rc;
}
@@ -1935,6 +1950,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
/* Release the id and free allocated memory of the handle */
idr_remove(&mgr->handles, handle_id);
+ hl_ctx_put(encaps_sig_hdl->ctx);
kfree(encaps_sig_hdl);
} else {
rc = -EINVAL;
@@ -1948,7 +1964,8 @@ out:
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
void __user *chunks, u32 num_chunks,
- u64 *cs_seq, u32 flags, u32 timeout)
+ u64 *cs_seq, u32 flags, u32 timeout,
+ u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
{
struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
bool handle_found = false, is_wait_cs = false,
@@ -2180,6 +2197,9 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto free_cs_object;
}
+ *signal_sob_addr_offset = cs->sob_addr_offset;
+ *signal_initial_sob_count = cs->initial_sob_count;
+
rc = HL_CS_STATUS_SUCCESS;
if (is_wait_cs)
wait_cs_submitted = true;
@@ -2210,6 +2230,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
void __user *chunks;
u32 num_chunks, flags, timeout,
signals_count = 0, sob_addr = 0, handle_id = 0;
+ u16 sob_initial_count = 0;
int rc;
rc = hl_cs_sanity_checks(hpriv, args);
@@ -2240,7 +2261,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
case CS_TYPE_WAIT:
case CS_TYPE_COLLECTIVE_WAIT:
rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
- &cs_seq, args->in.cs_flags, timeout);
+ &cs_seq, args->in.cs_flags, timeout,
+ &sob_addr, &sob_initial_count);
break;
case CS_RESERVE_SIGNALS:
rc = cs_ioctl_reserve_signals(hpriv,
@@ -2256,20 +2278,33 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags,
args->in.encaps_sig_handle_id,
- timeout);
+ timeout, &sob_initial_count);
break;
}
out:
if (rc != -EAGAIN) {
memset(args, 0, sizeof(*args));
- if (cs_type == CS_RESERVE_SIGNALS) {
+ switch (cs_type) {
+ case CS_RESERVE_SIGNALS:
args->out.handle_id = handle_id;
args->out.sob_base_addr_offset = sob_addr;
args->out.count = signals_count;
- } else {
+ break;
+ case CS_TYPE_SIGNAL:
+ args->out.sob_base_addr_offset = sob_addr;
+ args->out.sob_count_before_submission = sob_initial_count;
+ args->out.seq = cs_seq;
+ break;
+ case CS_TYPE_DEFAULT:
+ args->out.sob_count_before_submission = sob_initial_count;
args->out.seq = cs_seq;
+ break;
+ default:
+ args->out.seq = cs_seq;
+ break;
}
+
args->out.status = rc;
}
@@ -2334,16 +2369,18 @@ static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence
* hl_cs_poll_fences - iterate CS fences to check for CS completion
*
* @mcs_data: multi-CS internal data
+ * @mcs_compl: multi-CS completion structure
*
* @return 0 on success, otherwise non 0 error code
*
* The function iterates on all CS sequence in the list and set bit in
* completion_bitmap for each completed CS.
- * while iterating, the function can extracts the stream map to be later
- * used by the waiting function.
- * this function shall be called after taking context ref
+ * While iterating, the function sets the stream map of each fence in the fence
+ * array in the completion QID stream map to be used by CSs to perform
+ * completion to the multi-CS context.
+ * This function shall be called after taking context ref
*/
-static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
+static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
{
struct hl_fence **fence_ptr = mcs_data->fence_arr;
struct hl_device *hdev = mcs_data->ctx->hdev;
@@ -2360,6 +2397,15 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
return rc;
/*
+ * re-initialize the completion here to handle 2 possible cases:
+ * 1. CS will complete the multi-CS prior clearing the completion. in which
+ * case the fence iteration is guaranteed to catch the CS completion.
+ * 2. the completion will occur after re-init of the completion.
+ * in which case we will wake up immediately in wait_for_completion.
+ */
+ reinit_completion(&mcs_compl->completion);
+
+ /*
* set to maximum time to verify timestamp is valid: if at the end
* this value is maintained- no timestamp was updated
*/
@@ -2370,6 +2416,21 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
struct hl_fence *fence = *fence_ptr;
/*
+ * In order to prevent case where we wait until timeout even though a CS associated
+ * with the multi-CS actually completed we do things in the below order:
+ * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
+ * any CS can, potentially, complete the multi CS for the specific QID (note
+ * that once completion is initialized, calling complete* and then wait on the
+ * completion will cause it to return at once)
+ * 2. only after allowing multi-CS completion for the specific QID we check whether
+ * the specific CS already completed (and thus the wait for completion part will
+ * be skipped). if the CS not completed it is guaranteed that completing CS will
+ * wake up the completion.
+ */
+ if (fence)
+ mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
+
+ /*
* function won't sleep as it is called with timeout 0 (i.e.
* poll the fence)
*/
@@ -2384,9 +2445,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
switch (status) {
case CS_WAIT_STATUS_BUSY:
- /* CS did not finished, keep waiting on its QID*/
- mcs_data->stream_master_qid_map |=
- fence->stream_master_qid_map;
+ /* CS did not finished, QID to wait on already stored */
break;
case CS_WAIT_STATUS_COMPLETED:
/*
@@ -2394,9 +2453,19 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
* returns to user indicating CS completed before it finished
* all of its mcs handling, to avoid race the next time the
* user waits for mcs.
+ * note: when reaching this case fence is definitely not NULL
+ * but NULL check was added to overcome static analysis
*/
- if (!fence->mcs_handling_done)
+ if (fence && !fence->mcs_handling_done) {
+ /*
+ * in case multi CS is completed but MCS handling not done
+ * we "complete" the multi CS to prevent it from waiting
+ * until time-out and the "multi-CS handling done" will have
+ * another chance at the next iteration
+ */
+ complete_all(&mcs_compl->completion);
break;
+ }
mcs_data->completion_bitmap |= BIT(i);
/*
@@ -2456,6 +2525,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
+static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
+{
+ if (usecs <= U32_MAX)
+ return usecs_to_jiffies(usecs);
+
+ /*
+ * If the value in nanoseconds is larger than 64 bit, use the largest
+ * 64 bit value.
+ */
+ if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
+ return nsecs_to_jiffies(U64_MAX);
+
+ return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
+}
+
/*
* hl_wait_multi_cs_completion_init - init completion structure
*
@@ -2469,9 +2553,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
* the function gets the first available completion (by marking it "used")
* and initialize its values.
*/
-static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
- struct hl_device *hdev,
- u8 stream_master_bitmap)
+static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
{
struct multi_cs_completion *mcs_compl;
int i;
@@ -2483,8 +2565,11 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
if (!mcs_compl->used) {
mcs_compl->used = 1;
mcs_compl->timestamp = 0;
- mcs_compl->stream_master_qid_map = stream_master_bitmap;
- reinit_completion(&mcs_compl->completion);
+ /*
+ * init QID map to 0 to avoid completion by CSs. the actual QID map
+ * to multi-CS CSs will be set incrementally at a later stage
+ */
+ mcs_compl->stream_master_qid_map = 0;
spin_unlock(&mcs_compl->lock);
break;
}
@@ -2492,8 +2577,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
}
if (i == MULTI_CS_MAX_USER_CTX) {
- dev_err(hdev->dev,
- "no available multi-CS completion structure\n");
+ dev_err(hdev->dev, "no available multi-CS completion structure\n");
return ERR_PTR(-ENOMEM);
}
return mcs_compl;
@@ -2524,27 +2608,18 @@ static void hl_wait_multi_cs_completion_fini(
*
* @return 0 on success, otherwise non 0 error code
*/
-static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
+ struct multi_cs_completion *mcs_compl)
{
- struct hl_device *hdev = mcs_data->ctx->hdev;
- struct multi_cs_completion *mcs_compl;
long completion_rc;
- mcs_compl = hl_wait_multi_cs_completion_init(hdev,
- mcs_data->stream_master_qid_map);
- if (IS_ERR(mcs_compl))
- return PTR_ERR(mcs_compl);
-
- completion_rc = wait_for_completion_interruptible_timeout(
- &mcs_compl->completion,
- usecs_to_jiffies(mcs_data->timeout_us));
+ completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
+ mcs_data->timeout_jiffies);
/* update timestamp */
if (completion_rc > 0)
mcs_data->timestamp = mcs_compl->timestamp;
- hl_wait_multi_cs_completion_fini(mcs_compl);
-
mcs_data->wait_status = completion_rc;
return 0;
@@ -2577,6 +2652,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
*/
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
+ struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev;
struct multi_cs_data mcs_data = {0};
union hl_wait_cs_args *args = data;
@@ -2631,9 +2707,17 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
hl_ctx_get(hdev, ctx);
+ /* wait (with timeout) for the first CS to be completed */
+ mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
+ mcs_compl = hl_wait_multi_cs_completion_init(hdev);
+ if (IS_ERR(mcs_compl)) {
+ rc = PTR_ERR(mcs_compl);
+ goto put_ctx;
+ }
+
/* poll all CS fences, extract timestamp */
mcs_data.update_ts = true;
- rc = hl_cs_poll_fences(&mcs_data);
+ rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
/*
* skip wait for CS completion when one of the below is true:
* - an error on the poll function
@@ -2641,34 +2725,39 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* - the user called ioctl with timeout 0
*/
if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
- goto put_ctx;
+ goto completion_fini;
- /* wait (with timeout) for the first CS to be completed */
- mcs_data.timeout_us = args->in.timeout_us;
- rc = hl_wait_multi_cs_completion(&mcs_data);
- if (rc)
- goto put_ctx;
+ while (true) {
+ rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
+ if (rc || (mcs_data.wait_status == 0))
+ break;
- if (mcs_data.wait_status > 0) {
/*
* poll fences once again to update the CS map.
* no timestamp should be updated this time.
*/
mcs_data.update_ts = false;
- rc = hl_cs_poll_fences(&mcs_data);
+ rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
+
+ if (mcs_data.completion_bitmap)
+ break;
/*
* if hl_wait_multi_cs_completion returned before timeout (i.e.
- * it got a completion) we expect to see at least one CS
- * completed after the poll function.
+ * it got a completion) it either got completed by CS in the multi CS list
+ * (in which case the indication will be non empty completion_bitmap) or it
+ * got completed by CS submitted to one of the shared stream master but
+ * not in the multi CS list (in which case we should wait again but modify
+ * the timeout and set timestamp as zero to let a CS related to the current
+ * multi-CS set a new, relevant, timestamp)
*/
- if (!mcs_data.completion_bitmap) {
- dev_warn_ratelimited(hdev->dev,
- "Multi-CS got completion on wait but no CS completed\n");
- rc = -EFAULT;
- }
+ mcs_data.timeout_jiffies = mcs_data.wait_status;
+ mcs_compl->timestamp = 0;
}
+completion_fini:
+ hl_wait_multi_cs_completion_fini(mcs_compl);
+
put_ctx:
hl_ctx_put(ctx);
kfree(fence_arr);
@@ -2699,7 +2788,7 @@ free_seq_arr:
}
/* update if some CS was gone */
- if (mcs_data.timestamp)
+ if (!mcs_data.timestamp)
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
} else {
args->out.status = HL_WAIT_CS_STATUS_BUSY;
@@ -2766,37 +2855,129 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
- u32 timeout_us, u64 user_address,
- u64 target_value, u16 interrupt_offset,
- enum hl_cs_wait_status *status,
+ struct hl_cb_mgr *cb_mgr, u64 timeout_us,
+ u64 cq_counters_handle, u64 cq_counters_offset,
+ u64 target_value, struct hl_user_interrupt *interrupt,
+ u32 *status,
u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
- struct hl_user_interrupt *interrupt;
unsigned long timeout, flags;
- u64 completion_value;
long completion_rc;
+ struct hl_cb *cb;
int rc = 0;
+ u32 handle;
- if (timeout_us == U32_MAX)
- timeout = timeout_us;
- else
- timeout = usecs_to_jiffies(timeout_us);
+ timeout = hl_usecs64_to_jiffies(timeout_us);
hl_ctx_get(hdev, ctx);
- pend = kmalloc(sizeof(*pend), GFP_KERNEL);
+ cq_counters_handle >>= PAGE_SHIFT;
+ handle = (u32) cq_counters_handle;
+
+ cb = hl_cb_get(hdev, cb_mgr, handle);
+ if (!cb) {
+ hl_ctx_put(ctx);
+ return -EINVAL;
+ }
+
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
+ hl_cb_put(cb);
hl_ctx_put(ctx);
return -ENOMEM;
}
hl_fence_init(&pend->fence, ULONG_MAX);
- if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
- interrupt = &hdev->common_user_interrupt;
- else
- interrupt = &hdev->user_interrupt[interrupt_offset];
+ pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
+ pend->cq_target_value = target_value;
+
+ /* We check for completion value as interrupt could have been received
+ * before we added the node to the wait list
+ */
+ if (*pend->cq_kernel_addr >= target_value) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ /* There was no interrupt, we assume the completion is now. */
+ pend->fence.timestamp = ktime_get();
+ }
+
+ if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
+ goto set_timestamp;
+
+ /* Add pending user interrupt to relevant list for the interrupt
+ * handler to monitor
+ */
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+ /* Wait for interrupt handler to signal completion */
+ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
+ timeout);
+ if (completion_rc > 0) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ } else {
+ if (completion_rc == -ERESTARTSYS) {
+ dev_err_ratelimited(hdev->dev,
+ "user process got signal while waiting for interrupt ID %d\n",
+ interrupt->interrupt_id);
+ rc = -EINTR;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ if (pend->fence.error == -EIO) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+ pend->fence.error);
+ rc = -EIO;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
+ interrupt->interrupt_id);
+ rc = -ETIMEDOUT;
+ }
+ *status = HL_WAIT_CS_STATUS_BUSY;
+ }
+ }
+
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_del(&pend->wait_list_node);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+set_timestamp:
+ *timestamp = ktime_to_ns(pend->fence.timestamp);
+
+ kfree(pend);
+ hl_cb_put(cb);
+ hl_ctx_put(ctx);
+
+ return rc;
+}
+
+static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 timeout_us, u64 user_address,
+ u64 target_value, struct hl_user_interrupt *interrupt,
+
+ u32 *status,
+ u64 *timestamp)
+{
+ struct hl_user_pending_interrupt *pend;
+ unsigned long timeout, flags;
+ u64 completion_value;
+ long completion_rc;
+ int rc = 0;
+
+ timeout = hl_usecs64_to_jiffies(timeout_us);
+
+ hl_ctx_get(hdev, ctx);
+
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+ if (!pend) {
+ hl_ctx_put(ctx);
+ return -ENOMEM;
+ }
+
+ hl_fence_init(&pend->fence, ULONG_MAX);
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
@@ -2815,13 +2996,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
}
if (completion_value >= target_value) {
- *status = CS_WAIT_STATUS_COMPLETED;
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
/* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get();
- } else
- *status = CS_WAIT_STATUS_BUSY;
+ } else {
+ *status = HL_WAIT_CS_STATUS_BUSY;
+ }
- if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
+ if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
goto remove_pending_user_interrupt;
wait_again:
@@ -2850,7 +3032,13 @@ wait_again:
}
if (completion_value >= target_value) {
- *status = CS_WAIT_STATUS_COMPLETED;
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ } else if (pend->fence.error) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+ pend->fence.error);
+ /* set the command completion status as ABORTED */
+ *status = HL_WAIT_CS_STATUS_ABORTED;
} else {
timeout = completion_rc;
goto wait_again;
@@ -2861,7 +3049,7 @@ wait_again:
interrupt->interrupt_id);
rc = -EINTR;
} else {
- *status = CS_WAIT_STATUS_BUSY;
+ *status = HL_WAIT_CS_STATUS_BUSY;
}
remove_pending_user_interrupt:
@@ -2879,11 +3067,12 @@ remove_pending_user_interrupt:
static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
- u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
+ u16 interrupt_id, first_interrupt, last_interrupt;
struct hl_device *hdev = hpriv->hdev;
struct asic_fixed_properties *prop;
+ struct hl_user_interrupt *interrupt;
union hl_wait_cs_args *args = data;
- enum hl_cs_wait_status status;
+ u32 status = HL_WAIT_CS_STATUS_BUSY;
u64 timestamp;
int rc;
@@ -2894,8 +3083,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return -EPERM;
}
- interrupt_id =
- FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
+ interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
first_interrupt = prop->first_available_user_msix_interrupt;
last_interrupt = prop->first_available_user_msix_interrupt +
@@ -2908,15 +3096,21 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
- interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
+ interrupt = &hdev->common_user_interrupt;
else
- interrupt_offset = interrupt_id - first_interrupt;
+ interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
- rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
+ if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
+ rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
+ args->in.interrupt_timeout_us, args->in.cq_counters_handle,
+ args->in.cq_counters_offset,
+ args->in.target, interrupt, &status,
+ &timestamp);
+ else
+ rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
- args->in.target, interrupt_offset, &status,
+ args->in.target, interrupt, &status,
&timestamp);
-
if (rc) {
if (rc != -EINTR)
dev_err_ratelimited(hdev->dev,
@@ -2926,22 +3120,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
memset(args, 0, sizeof(*args));
+ args->out.status = status;
if (timestamp) {
args->out.timestamp_nsec = timestamp;
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
}
- switch (status) {
- case CS_WAIT_STATUS_COMPLETED:
- args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
- break;
- case CS_WAIT_STATUS_BUSY:
- default:
- args->out.status = HL_WAIT_CS_STATUS_BUSY;
- break;
- }
-
return 0;
}
@@ -2955,7 +3140,7 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* user interrupt
*/
if (!hl_device_operational(hpriv->hdev, NULL))
- return -EPERM;
+ return -EBUSY;
if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
rc = hl_interrupt_wait_ioctl(hpriv, data);
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index d0aaccd4df2c..c6360e33bce8 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -13,13 +13,13 @@ void hl_encaps_handle_do_release(struct kref *ref)
{
struct hl_cs_encaps_sig_handle *handle =
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
- struct hl_ctx *ctx = handle->hdev->compute_ctx;
- struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+ struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
spin_lock(&mgr->lock);
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
+ hl_ctx_put(handle->ctx);
kfree(handle);
}
@@ -27,8 +27,7 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref)
{
struct hl_cs_encaps_sig_handle *handle =
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
- struct hl_ctx *ctx = handle->hdev->compute_ctx;
- struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+ struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
/* if we're here, then there was a signals reservation but cs with
* encaps signals wasn't submitted, so need to put refcount
@@ -40,6 +39,7 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref)
idr_remove(&mgr->handles, handle->id);
spin_unlock(&mgr->lock);
+ hl_ctx_put(handle->ctx);
kfree(handle);
}
@@ -97,11 +97,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
* related to the stopped engines. Hence stop it explicitly.
- * Stop only if this is the compute context, as there can be
- * only one compute context
*/
- if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
- hl_device_set_debug_mode(hdev, false);
+ if (hdev->in_debug)
+ hl_device_set_debug_mode(hdev, ctx, false);
hdev->asic_funcs->ctx_fini(ctx);
hl_cb_va_pool_fini(ctx);
@@ -167,7 +165,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
hpriv->ctx = ctx;
/* TODO: remove the following line for multiple process support */
- hdev->compute_ctx = ctx;
+ hdev->is_compute_ctx_active = true;
return 0;
@@ -274,6 +272,27 @@ int hl_ctx_put(struct hl_ctx *ctx)
return kref_put(&ctx->refcount, hl_ctx_do_release);
}
+struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev)
+{
+ struct hl_ctx *ctx = NULL;
+ struct hl_fpriv *hpriv;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+ /* There can only be a single user which has opened the compute device, so exit
+ * immediately once we find him
+ */
+ ctx = hpriv->ctx;
+ hl_ctx_get(hdev, ctx);
+ break;
+ }
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ return ctx;
+}
+
/*
* hl_ctx_get_fence_locked - get CS fence under CS lock
*
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 1f2a3dc6c4e2..fc084ee5106e 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -15,19 +15,25 @@
#define MMU_ADDR_BUF_SIZE 40
#define MMU_ASID_BUF_SIZE 10
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
+#define I2C_MAX_TRANSACTION_LEN 8
static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
- u8 i2c_reg, long *val)
+ u8 i2c_reg, u8 i2c_len, u64 *val)
{
struct cpucp_packet pkt;
- u64 result;
int rc;
if (!hl_device_operational(hdev, NULL))
return -EBUSY;
+ if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
+ dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
+ i2c_len, I2C_MAX_TRANSACTION_LEN);
+ return -EINVAL;
+ }
+
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
@@ -35,12 +41,10 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
+ pkt.i2c_len = i2c_len;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, &result);
-
- *val = (long) result;
-
+ 0, val);
if (rc)
dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
@@ -48,7 +52,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
}
static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
- u8 i2c_reg, u32 val)
+ u8 i2c_reg, u8 i2c_len, u64 val)
{
struct cpucp_packet pkt;
int rc;
@@ -56,6 +60,12 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
if (!hl_device_operational(hdev, NULL))
return -EBUSY;
+ if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
+ dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
+ i2c_len, I2C_MAX_TRANSACTION_LEN);
+ return -EINVAL;
+ }
+
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
@@ -63,6 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
+ pkt.i2c_len = i2c_len;
pkt.value = cpu_to_le64(val);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -235,6 +246,8 @@ static int vm_show(struct seq_file *s, void *data)
struct hl_vm_hash_node *hnode;
struct hl_userptr *userptr;
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+ struct hl_va_range *va_range;
+ struct hl_vm_va_block *va_block;
enum vm_type *vm_type;
bool once = true;
u64 j;
@@ -314,6 +327,25 @@ static int vm_show(struct seq_file *s, void *data)
spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+ ctx = hl_get_compute_ctx(dev_entry->hdev);
+ if (ctx) {
+ seq_puts(s, "\nVA ranges:\n\n");
+ for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) {
+ va_range = ctx->va_range[i];
+ seq_printf(s, " va_range %d\n", i);
+ seq_puts(s, "---------------------\n");
+ mutex_lock(&va_range->lock);
+ list_for_each_entry(va_block, &va_range->list, node) {
+ seq_printf(s, "%#16llx - %#16llx (%#llx)\n",
+ va_block->start, va_block->end,
+ va_block->size);
+ }
+ mutex_unlock(&va_range->lock);
+ seq_puts(s, "\n");
+ }
+ hl_ctx_put(ctx);
+ }
+
if (!once)
seq_puts(s, "\n");
@@ -407,7 +439,7 @@ static int mmu_show(struct seq_file *s, void *data)
if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
ctx = hdev->kernel_ctx;
else
- ctx = hdev->compute_ctx;
+ ctx = hl_get_compute_ctx(hdev);
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
@@ -495,7 +527,7 @@ static int engines_show(struct seq_file *s, void *data)
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't check device idle during reset\n");
return 0;
@@ -560,7 +592,7 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
u64 *phys_addr)
{
struct hl_vm_phys_pg_pack *phys_pg_pack;
- struct hl_ctx *ctx = hdev->compute_ctx;
+ struct hl_ctx *ctx;
struct hl_vm_hash_node *hnode;
u64 end_address, range_size;
struct hl_userptr *userptr;
@@ -568,6 +600,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
bool valid = false;
int i, rc = 0;
+ ctx = hl_get_compute_ctx(hdev);
+
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
@@ -624,7 +658,7 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
ssize_t rc;
u32 val;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
return 0;
}
@@ -660,7 +694,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
u32 value;
ssize_t rc;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
return 0;
}
@@ -697,7 +731,7 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf,
ssize_t rc;
u64 val;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
return 0;
}
@@ -733,7 +767,7 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf,
u64 value;
ssize_t rc;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
return 0;
}
@@ -768,7 +802,7 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
ssize_t rc;
u32 size;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n");
return 0;
}
@@ -874,22 +908,22 @@ static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[32];
- long val;
+ u64 val;
ssize_t rc;
if (*ppos)
return 0;
rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
- entry->i2c_reg, &val);
+ entry->i2c_reg, entry->i2c_len, &val);
if (rc) {
dev_err(hdev->dev,
- "Failed to read from I2C bus %d, addr %d, reg %d\n",
- entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ "Failed to read from I2C bus %d, addr %d, reg %d, len %d\n",
+ entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
return rc;
}
- sprintf(tmp_buf, "0x%02lx\n", val);
+ sprintf(tmp_buf, "%#02llx\n", val);
rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
strlen(tmp_buf));
@@ -901,19 +935,19 @@ static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- u32 value;
+ u64 value;
ssize_t rc;
- rc = kstrtouint_from_user(buf, count, 16, &value);
+ rc = kstrtou64_from_user(buf, count, 16, &value);
if (rc)
return rc;
rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
- entry->i2c_reg, value);
+ entry->i2c_reg, entry->i2c_len, value);
if (rc) {
dev_err(hdev->dev,
- "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
- value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ "Failed to write %#02llx to I2C bus %d, addr %d, reg %d, len %d\n",
+ value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
return rc;
}
@@ -1043,7 +1077,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
u64 value;
ssize_t rc;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change clock gating during reset\n");
return 0;
@@ -1085,7 +1119,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
u32 value;
ssize_t rc;
- if (atomic_read(&hdev->in_reset)) {
+ if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change stop on error during reset\n");
return 0;
@@ -1396,6 +1430,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry->root,
&dev_entry->i2c_reg);
+ debugfs_create_u8("i2c_len",
+ 0644,
+ dev_entry->root,
+ &dev_entry->i2c_len);
+
debugfs_create_file("i2c_data",
0644,
dev_entry->root,
@@ -1458,7 +1497,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
debugfs_create_x8("skip_reset_on_timeout",
0644,
dev_entry->root,
- &hdev->skip_reset_on_timeout);
+ &hdev->reset_info.skip_reset_on_timeout);
debugfs_create_file("state_dump",
0600,
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 2022e5d7b3ad..733338ab6f1d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -17,9 +17,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
- if (atomic_read(&hdev->in_reset))
+ if (hdev->reset_info.in_reset)
status = HL_DEVICE_STATUS_IN_RESET;
- else if (hdev->needs_reset)
+ else if (hdev->reset_info.needs_reset)
status = HL_DEVICE_STATUS_NEEDS_RESET;
else if (hdev->disabled)
status = HL_DEVICE_STATUS_MALFUNCTION;
@@ -95,14 +95,14 @@ static void hpriv_release(struct kref *ref)
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|| hdev->reset_upon_device_release)
- hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
+ hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
- /* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
+ /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
* thread, we don't care because the in_reset is marked so if a user will try to open
- * the device it will fail on that, even if compute_ctx is NULL.
+ * the device it will fail on that, even if compute_ctx is false.
*/
mutex_lock(&hdev->fpriv_list_lock);
- hdev->compute_ctx = NULL;
+ hdev->is_compute_ctx_active = false;
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
@@ -169,9 +169,9 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
goto out;
}
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
list_del(&hpriv->dev_node);
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
out:
put_pid(hpriv->taskpid);
@@ -324,16 +324,12 @@ put_devices:
static void device_hard_reset_pending(struct work_struct *work)
{
struct hl_device_reset_work *device_reset_work =
- container_of(work, struct hl_device_reset_work,
- reset_work.work);
+ container_of(work, struct hl_device_reset_work, reset_work.work);
struct hl_device *hdev = device_reset_work->hdev;
u32 flags;
int rc;
- flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
-
- if (device_reset_work->fw_reset)
- flags |= HL_RESET_FW;
+ flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
rc = hl_device_reset(hdev, flags);
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
@@ -452,9 +448,12 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->debug_lock);
INIT_LIST_HEAD(&hdev->cs_mirror_list);
spin_lock_init(&hdev->cs_mirror_lock);
+ spin_lock_init(&hdev->reset_info.lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
+ INIT_LIST_HEAD(&hdev->fpriv_ctrl_list);
mutex_init(&hdev->fpriv_list_lock);
- atomic_set(&hdev->in_reset, 0);
+ mutex_init(&hdev->fpriv_ctrl_list_lock);
+ mutex_init(&hdev->clk_throttling.lock);
return 0;
@@ -494,6 +493,9 @@ static void device_early_fini(struct hl_device *hdev)
mutex_destroy(&hdev->send_cpu_message_lock);
mutex_destroy(&hdev->fpriv_list_lock);
+ mutex_destroy(&hdev->fpriv_ctrl_list_lock);
+
+ mutex_destroy(&hdev->clk_throttling.lock);
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
@@ -513,22 +515,6 @@ static void device_early_fini(struct hl_device *hdev)
hdev->asic_funcs->early_fini(hdev);
}
-static void set_freq_to_low_job(struct work_struct *work)
-{
- struct hl_device *hdev = container_of(work, struct hl_device,
- work_freq.work);
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- if (!hdev->compute_ctx)
- hl_device_set_frequency(hdev, PLL_LOW);
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
- schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-}
-
static void hl_device_heartbeat(struct work_struct *work)
{
struct hl_device *hdev = container_of(work, struct hl_device,
@@ -540,8 +526,10 @@ static void hl_device_heartbeat(struct work_struct *work)
if (!hdev->asic_funcs->send_heartbeat(hdev))
goto reschedule;
- dev_err(hdev->dev, "Device heartbeat failed!\n");
- hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
+ if (hl_device_operational(hdev, NULL))
+ dev_err(hdev->dev, "Device heartbeat failed!\n");
+
+ hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
return;
@@ -552,12 +540,12 @@ reschedule:
* If control reached here, then at least one heartbeat work has been
* scheduled since last reset/init cycle.
* So if the device is not already in reset cycle, reset the flag
- * prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
+ * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
* status for at least one heartbeat. From this point driver restarts
* tracking future consecutive fatal errors.
*/
- if (!(atomic_read(&hdev->in_reset)))
- hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
+ if (!hdev->reset_info.in_reset)
+ hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
@@ -586,18 +574,6 @@ static int device_late_init(struct hl_device *hdev)
hdev->high_pll = hdev->asic_prop.high_pll;
- /* force setting to low frequency */
- hdev->curr_pll_profile = PLL_LOW;
-
- if (hdev->pm_mng_profile == PM_AUTO)
- hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
- else
- hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
-
- INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
- schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-
if (hdev->heartbeat) {
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
schedule_delayed_work(&hdev->work_heartbeat,
@@ -620,7 +596,6 @@ static void device_late_fini(struct hl_device *hdev)
if (!hdev->late_init_done)
return;
- cancel_delayed_work_sync(&hdev->work_freq);
if (hdev->heartbeat)
cancel_delayed_work_sync(&hdev->work_heartbeat);
@@ -650,36 +625,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
return 0;
}
-/*
- * hl_device_set_frequency - set the frequency of the device
- *
- * @hdev: pointer to habanalabs device structure
- * @freq: the new frequency value
- *
- * Change the frequency if needed. This function has no protection against
- * concurrency, therefore it is assumed that the calling function has protected
- * itself against the case of calling this function from multiple threads with
- * different values
- *
- * Returns 0 if no change was done, otherwise returns 1
- */
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
-{
- if ((hdev->pm_mng_profile == PM_MANUAL) ||
- (hdev->curr_pll_profile == freq))
- return 0;
-
- dev_dbg(hdev->dev, "Changing device frequency to %s\n",
- freq == PLL_HIGH ? "high" : "low");
-
- hdev->asic_funcs->set_pll_profile(hdev, freq);
-
- hdev->curr_pll_profile = freq;
-
- return 1;
-}
-
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable)
{
int rc = 0;
@@ -693,12 +639,12 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
goto out;
}
- if (!hdev->hard_reset_pending)
- hdev->asic_funcs->halt_coresight(hdev);
+ if (!hdev->reset_info.hard_reset_pending)
+ hdev->asic_funcs->halt_coresight(hdev, ctx);
hdev->in_debug = 0;
- if (!hdev->hard_reset_pending)
+ if (!hdev->reset_info.hard_reset_pending)
hdev->asic_funcs->set_clock_gating(hdev);
goto out;
@@ -735,6 +681,8 @@ static void take_release_locks(struct hl_device *hdev)
/* Flush anyone that is inside device open */
mutex_lock(&hdev->fpriv_list_lock);
mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
}
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
@@ -774,11 +722,14 @@ int hl_device_suspend(struct hl_device *hdev)
pci_save_state(hdev->pdev);
/* Block future CS/VM/JOB completion operations */
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- if (rc) {
+ spin_lock(&hdev->reset_info.lock);
+ if (hdev->reset_info.in_reset) {
+ spin_unlock(&hdev->reset_info.lock);
dev_err(hdev->dev, "Can't suspend while in reset\n");
return -EIO;
}
+ hdev->reset_info.in_reset = 1;
+ spin_unlock(&hdev->reset_info.lock);
/* This blocks all other stuff that is not blocked by in_reset */
hdev->disabled = true;
@@ -828,10 +779,12 @@ int hl_device_resume(struct hl_device *hdev)
}
- hdev->disabled = false;
- atomic_set(&hdev->in_reset, 0);
+ /* 'in_reset' was set to true during suspend, now we must clear it in order
+ * for hard reset to be performed
+ */
+ hdev->reset_info.in_reset = 0;
- rc = hl_device_reset(hdev, HL_RESET_HARD);
+ rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
if (rc) {
dev_err(hdev->dev, "Failed to reset device during resume\n");
goto disable_device;
@@ -846,17 +799,21 @@ disable_device:
return rc;
}
-static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
+static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
{
- struct hl_fpriv *hpriv;
struct task_struct *task = NULL;
+ struct list_head *fd_list;
+ struct hl_fpriv *hpriv;
+ struct mutex *fd_lock;
u32 pending_cnt;
+ fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+ fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
/* Giving time for user to close FD, and for processes that are inside
* hl_device_open to finish
*/
- if (!list_empty(&hdev->fpriv_list))
+ if (!list_empty(fd_list))
ssleep(1);
if (timeout) {
@@ -872,12 +829,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
}
}
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(fd_lock);
/* This section must be protected because we are dereferencing
* pointers that are freed if the process exits
*/
- list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+ list_for_each_entry(hpriv, fd_list, dev_node) {
task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
if (task) {
dev_info(hdev->dev, "Killing user process pid=%d\n",
@@ -889,12 +846,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
} else {
dev_warn(hdev->dev,
"Can't get task struct for PID so giving up on killing process\n");
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
return -ETIME;
}
}
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
/*
* We killed the open users, but that doesn't mean they are closed.
@@ -906,7 +863,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
*/
wait_for_processes:
- while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+ while ((!list_empty(fd_list)) && (pending_cnt)) {
dev_dbg(hdev->dev,
"Waiting for all unmap operations to finish before hard reset\n");
@@ -916,7 +873,7 @@ wait_for_processes:
}
/* All processes exited successfully */
- if (list_empty(&hdev->fpriv_list))
+ if (list_empty(fd_list))
return 0;
/* Give up waiting for processes to exit */
@@ -928,14 +885,19 @@ wait_for_processes:
return -EBUSY;
}
-static void device_disable_open_processes(struct hl_device *hdev)
+static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
{
+ struct list_head *fd_list;
struct hl_fpriv *hpriv;
+ struct mutex *fd_lock;
- mutex_lock(&hdev->fpriv_list_lock);
- list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+ fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+ fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+
+ mutex_lock(fd_lock);
+ list_for_each_entry(hpriv, fd_list, dev_node)
hpriv->hdev = NULL;
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(fd_lock);
}
static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
@@ -948,17 +910,17 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
- if (flags & HL_RESET_HEARTBEAT) {
- hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
- cur_reset_trigger = HL_RESET_HEARTBEAT;
- } else if (flags & HL_RESET_TDR) {
- hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
- cur_reset_trigger = HL_RESET_TDR;
- } else if (flags & HL_RESET_FW_FATAL_ERR) {
- hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
- cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
+ if (flags & HL_DRV_RESET_HEARTBEAT) {
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
+ cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
+ } else if (flags & HL_DRV_RESET_TDR) {
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR;
+ cur_reset_trigger = HL_DRV_RESET_TDR;
+ } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+ cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
} else {
- hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
/*
@@ -966,11 +928,11 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
* is set and if this reset is due to a fatal FW error
* device is set to an unstable state.
*/
- if (hdev->prev_reset_trigger != cur_reset_trigger) {
- hdev->prev_reset_trigger = cur_reset_trigger;
- hdev->reset_trigger_repeated = 0;
+ if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
+ hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
+ hdev->reset_info.reset_trigger_repeated = 0;
} else {
- hdev->reset_trigger_repeated = 1;
+ hdev->reset_info.reset_trigger_repeated = 1;
}
/* If reset is due to heartbeat, device CPU is no responsive in
@@ -979,8 +941,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
- if ((flags & HL_RESET_HARD) &&
- !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
+ if ((flags & HL_DRV_RESET_HARD) &&
+ !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
@@ -1015,34 +977,39 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
*/
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
+ bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
+ reset_upon_device_release = false, schedule_hard_reset = false;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
- bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
+ struct hl_ctx *ctx;
int i, rc;
if (!hdev->init_done) {
- dev_err(hdev->dev,
- "Can't reset before initialization is done\n");
+ dev_err(hdev->dev, "Can't reset before initialization is done\n");
return 0;
}
- hard_reset = !!(flags & HL_RESET_HARD);
- from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
- fw_reset = !!(flags & HL_RESET_FW);
+ hard_reset = !!(flags & HL_DRV_RESET_HARD);
+ from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
+ fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
- if (!hard_reset && !hdev->supports_soft_reset) {
+ if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
hard_instead_soft = true;
hard_reset = true;
}
- if (hdev->reset_upon_device_release &&
- (flags & HL_RESET_DEVICE_RELEASE)) {
- dev_dbg(hdev->dev,
- "Perform %s-reset upon device release\n",
- hard_reset ? "hard" : "soft");
+ if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) {
+ if (hard_reset) {
+ dev_crit(hdev->dev,
+ "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
+ return -EINVAL;
+ }
+
+ reset_upon_device_release = true;
+
goto do_reset;
}
- if (!hard_reset && !hdev->allow_inference_soft_reset) {
+ if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
hard_instead_soft = true;
hard_reset = true;
}
@@ -1062,12 +1029,22 @@ do_reset:
*/
if (!from_hard_reset_thread) {
/* Block future CS/VM/JOB completion operations */
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- if (rc)
+ spin_lock(&hdev->reset_info.lock);
+ if (hdev->reset_info.in_reset) {
+ /* We only allow scheduling of a hard reset during soft reset */
+ if (hard_reset && hdev->reset_info.is_in_soft_reset)
+ hdev->reset_info.hard_reset_schedule_flags = flags;
+ spin_unlock(&hdev->reset_info.lock);
return 0;
+ }
+ hdev->reset_info.in_reset = 1;
+ spin_unlock(&hdev->reset_info.lock);
handle_reset_trigger(hdev, flags);
+ /* This still allows the completion of some KDMA ops */
+ hdev->reset_info.is_in_soft_reset = !hard_reset;
+
/* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true;
@@ -1075,21 +1052,19 @@ do_reset:
if (hard_reset)
dev_info(hdev->dev, "Going to reset device\n");
- else if (flags & HL_RESET_DEVICE_RELEASE)
- dev_info(hdev->dev,
- "Going to reset device after it was released by user\n");
+ else if (reset_upon_device_release)
+ dev_info(hdev->dev, "Going to reset device after release by user\n");
else
- dev_info(hdev->dev,
- "Going to reset compute engines of inference device\n");
+ dev_info(hdev->dev, "Going to reset engines of inference device\n");
}
again:
if ((hard_reset) && (!from_hard_reset_thread)) {
- hdev->hard_reset_pending = true;
+ hdev->reset_info.hard_reset_pending = true;
hdev->process_kill_trial_cnt = 0;
- hdev->device_reset_work.fw_reset = fw_reset;
+ hdev->device_reset_work.flags = flags;
/*
* Because the reset function can't run from heartbeat work,
@@ -1109,7 +1084,7 @@ kill_processes:
* process can't really exit until all its CSs are done, which
* is what we do in cs rollback
*/
- rc = device_kill_open_processes(hdev, 0);
+ rc = device_kill_open_processes(hdev, 0, false);
if (rc == -EBUSY) {
if (hdev->device_fini_pending) {
@@ -1138,7 +1113,7 @@ kill_processes:
hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
if (hard_reset) {
- hdev->fw_loader.linux_loaded = false;
+ hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
/* Release kernel context */
if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
@@ -1154,24 +1129,23 @@ kill_processes:
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_reset(hdev, &hdev->completion_queue[i]);
- mutex_lock(&hdev->fpriv_list_lock);
-
/* Make sure the context switch phase will run again */
- if (hdev->compute_ctx) {
- atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
- hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
+ ctx = hl_get_compute_ctx(hdev);
+ if (ctx) {
+ atomic_set(&ctx->thread_ctx_switch_token, 1);
+ ctx->thread_ctx_switch_wait_token = 0;
+ hl_ctx_put(ctx);
}
- mutex_unlock(&hdev->fpriv_list_lock);
-
/* Finished tear-down, starting to re-initialize */
if (hard_reset) {
hdev->device_cpu_disabled = false;
- hdev->hard_reset_pending = false;
+ hdev->reset_info.hard_reset_pending = false;
- if (hdev->reset_trigger_repeated &&
- (hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
+ if (hdev->reset_info.reset_trigger_repeated &&
+ (hdev->reset_info.prev_reset_trigger ==
+ HL_DRV_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
*/
@@ -1204,7 +1178,7 @@ kill_processes:
goto out_err;
}
- hdev->compute_ctx = NULL;
+ hdev->is_compute_ctx_active = false;
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
@@ -1225,16 +1199,14 @@ kill_processes:
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
- dev_err(hdev->dev,
- "failed to initialize the H/W after reset\n");
+ dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
goto out_err;
}
/* If device is not idle fail the reset process */
if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
- dev_err(hdev->dev,
- "device is not idle (mask 0x%llx_%llx) after reset\n",
+ dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
idle_mask[1], idle_mask[0]);
rc = -EIO;
goto out_err;
@@ -1243,43 +1215,56 @@ kill_processes:
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
- dev_err(hdev->dev,
- "Failed to detect if device is alive after reset\n");
+ dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
goto out_err;
}
if (hard_reset) {
rc = device_late_init(hdev);
if (rc) {
- dev_err(hdev->dev,
- "Failed late init after hard reset\n");
+ dev_err(hdev->dev, "Failed late init after hard reset\n");
goto out_err;
}
rc = hl_vm_init(hdev);
if (rc) {
- dev_err(hdev->dev,
- "Failed to init memory module after hard reset\n");
+ dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
goto out_err;
}
hl_set_max_power(hdev);
} else {
- rc = hdev->asic_funcs->soft_reset_late_init(hdev);
+ rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
if (rc) {
- dev_err(hdev->dev,
- "Failed late init after soft reset\n");
+ if (reset_upon_device_release)
+ dev_err(hdev->dev,
+ "Failed late init in reset after device release\n");
+ else
+ dev_err(hdev->dev, "Failed late init after soft reset\n");
goto out_err;
}
}
- atomic_set(&hdev->in_reset, 0);
- hdev->needs_reset = false;
+ spin_lock(&hdev->reset_info.lock);
+ hdev->reset_info.is_in_soft_reset = false;
+
+ /* Schedule hard reset only if requested and if not already in hard reset.
+ * We keep 'in_reset' enabled, so no other reset can go in during the hard
+ * reset schedule
+ */
+ if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
+ schedule_hard_reset = true;
+ else
+ hdev->reset_info.in_reset = 0;
+
+ spin_unlock(&hdev->reset_info.lock);
+
+ hdev->reset_info.needs_reset = false;
dev_notice(hdev->dev, "Successfully finished resetting the device\n");
if (hard_reset) {
- hdev->hard_reset_cnt++;
+ hdev->reset_info.hard_reset_cnt++;
/* After reset is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events
@@ -1287,28 +1272,41 @@ kill_processes:
* the device will be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
- } else {
- hdev->soft_reset_cnt++;
+ } else if (!reset_upon_device_release) {
+ hdev->reset_info.soft_reset_cnt++;
+ }
+
+ if (schedule_hard_reset) {
+ dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
+ flags = hdev->reset_info.hard_reset_schedule_flags;
+ hdev->reset_info.hard_reset_schedule_flags = 0;
+ hdev->disabled = true;
+ hard_reset = true;
+ handle_reset_trigger(hdev, flags);
+ goto again;
}
return 0;
out_err:
hdev->disabled = true;
+ hdev->reset_info.is_in_soft_reset = false;
if (hard_reset) {
- dev_err(hdev->dev,
- "Failed to reset! Device is NOT usable\n");
- hdev->hard_reset_cnt++;
+ dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
+ hdev->reset_info.hard_reset_cnt++;
+ } else if (reset_upon_device_release) {
+ dev_err(hdev->dev, "Failed to reset device after user release\n");
+ hard_reset = true;
+ goto again;
} else {
- dev_err(hdev->dev,
- "Failed to do soft-reset, trying hard reset\n");
- hdev->soft_reset_cnt++;
+ dev_err(hdev->dev, "Failed to do soft-reset\n");
+ hdev->reset_info.soft_reset_cnt++;
hard_reset = true;
goto again;
}
- atomic_set(&hdev->in_reset, 0);
+ hdev->reset_info.in_reset = 0;
return rc;
}
@@ -1455,7 +1453,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto mmu_fini;
}
- hdev->compute_ctx = NULL;
+ hdev->is_compute_ctx_active = false;
hdev->asic_funcs->state_dump_init(hdev);
@@ -1619,6 +1617,7 @@ out_disabled:
*/
void hl_device_fini(struct hl_device *hdev)
{
+ bool device_in_reset;
ktime_t timeout;
u64 reset_sec;
int i, rc;
@@ -1642,10 +1641,22 @@ void hl_device_fini(struct hl_device *hdev)
*/
timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- while (rc) {
+
+ spin_lock(&hdev->reset_info.lock);
+ device_in_reset = !!hdev->reset_info.in_reset;
+ if (!device_in_reset)
+ hdev->reset_info.in_reset = 1;
+ spin_unlock(&hdev->reset_info.lock);
+
+ while (device_in_reset) {
usleep_range(50, 200);
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+
+ spin_lock(&hdev->reset_info.lock);
+ device_in_reset = !!hdev->reset_info.in_reset;
+ if (!device_in_reset)
+ hdev->reset_info.in_reset = 1;
+ spin_unlock(&hdev->reset_info.lock);
+
if (ktime_compare(ktime_get(), timeout) > 0) {
dev_crit(hdev->dev,
"Failed to remove device because reset function did not finish\n");
@@ -1667,7 +1678,7 @@ void hl_device_fini(struct hl_device *hdev)
take_release_locks(hdev);
- hdev->hard_reset_pending = true;
+ hdev->reset_info.hard_reset_pending = true;
hl_hwmon_fini(hdev);
@@ -1681,10 +1692,16 @@ void hl_device_fini(struct hl_device *hdev)
"Waiting for all processes to exit (timeout of %u seconds)",
HL_PENDING_RESET_LONG_SEC);
- rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
+ rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
if (rc) {
dev_crit(hdev->dev, "Failed to kill all open processes\n");
- device_disable_open_processes(hdev);
+ device_disable_open_processes(hdev, false);
+ }
+
+ rc = device_kill_open_processes(hdev, 0, true);
+ if (rc) {
+ dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
+ device_disable_open_processes(hdev, true);
}
hl_cb_pool_fini(hdev);
@@ -1692,7 +1709,7 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true, false);
- hdev->fw_loader.linux_loaded = false;
+ hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
/* Release kernel context */
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 4e68fb9d2a6b..6775c5c3166b 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -15,8 +15,6 @@
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
-#define FW_CPU_STATUS_POLL_INTERVAL_USEC 10000
-
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
@@ -214,7 +212,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
- u32 tmp, expected_ack_val;
+ struct hl_bd *sent_bd;
+ u32 tmp, expected_ack_val, pi;
int rc = 0;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -239,6 +238,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
/* set fence to a non valid value */
pkt->fence = cpu_to_le32(UINT_MAX);
+ pi = queue->pi;
/*
* The CPU queue is a synchronous queue with an effective depth of
@@ -248,7 +248,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
* Which means that we don't need to lock the access to the entire H/W
* queues module when submitting a JOB to the CPU queue.
*/
- hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
+ hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr);
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi;
@@ -280,6 +280,14 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
*result = le64_to_cpu(pkt->result);
}
+ /* Scrub previous buffer descriptor 'ctl' field which contains the
+ * previous PI value written during packet submission.
+ * We must do this or else F/W can read an old value upon queue wraparound.
+ */
+ sent_bd = queue->kernel_address;
+ sent_bd += hl_pi_2_offset(pi);
+ sent_bd->ctl = cpu_to_le32(UINT_MAX);
+
out:
mutex_unlock(&hdev->send_cpu_message_lock);
@@ -445,15 +453,6 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_exists = true;
}
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
- dev_warn(hdev->dev,
- "Device boot warning - Skipped DRAM initialization\n");
- /* This is a warning so we don't want it to disable the
- * device
- */
- err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
- }
-
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
if (hdev->bmc_enable) {
dev_err(hdev->dev,
@@ -497,15 +496,6 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_exists = true;
}
- if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
- dev_warn(hdev->dev,
- "Device boot warning - Failed to load preboot primary image\n");
- /* This is a warning so we don't want it to disable the
- * device as we have a secondary preboot image
- */
- err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
- }
-
if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
err_exists = true;
@@ -525,6 +515,34 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
+ /* All warnings should go here in order not to reach the unknown error validation */
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
+ dev_warn(hdev->dev,
+ "Device boot warning - Skipped DRAM initialization\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
+ dev_warn(hdev->dev,
+ "Device boot warning - Failed to load preboot primary image\n");
+ /* This is a warning so we don't want it to disable the
+ * device as we have a secondary preboot image
+ */
+ err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
+ dev_warn(hdev->dev,
+ "Device boot warning - TPM failure\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
+ }
+
if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
dev_err(hdev->dev,
"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
@@ -961,6 +979,7 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.type = cpu_to_le16(CPUCP_POWER_INPUT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
@@ -974,6 +993,92 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
return rc;
}
+int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
+ struct cpucp_hbm_row_info *info)
+{
+ struct cpucp_hbm_row_info *cpucp_repl_rows_info_cpu_addr;
+ dma_addr_t cpucp_repl_rows_info_dma_addr;
+ struct cpucp_packet pkt = {};
+ u64 result;
+ int rc;
+
+ cpucp_repl_rows_info_cpu_addr =
+ hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ sizeof(struct cpucp_hbm_row_info),
+ &cpucp_repl_rows_info_dma_addr);
+ if (!cpucp_repl_rows_info_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for CPU-CP replaced rows info packet\n");
+ return -ENOMEM;
+ }
+
+ memset(cpucp_repl_rows_info_cpu_addr, 0, sizeof(struct cpucp_hbm_row_info));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(cpucp_repl_rows_info_dma_addr);
+ pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_hbm_row_info));
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc);
+ goto out;
+ }
+
+ memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info));
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ sizeof(struct cpucp_hbm_row_info),
+ cpucp_repl_rows_info_cpu_addr);
+
+ return rc;
+}
+
+int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num)
+{
+ struct cpucp_packet pkt;
+ u64 result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_PENDING_ROWS_STATUS << CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP pending rows info pkt, error %d\n", rc);
+ goto out;
+ }
+
+ *pend_rows_num = (u32) result;
+out:
+ return rc;
+}
+
+int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid)
+{
+ struct cpucp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_ENGINE_CORE_ASID_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.value = cpu_to_le64(asid);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_INFO_TIMEOUT_USEC, NULL);
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed on ASID configuration request for engine core, error %d\n",
+ rc);
+
+ return rc;
+}
+
void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
{
struct static_fw_load_mgr *static_loader =
@@ -1028,7 +1133,7 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
switch (status) {
case CPU_BOOT_STATUS_NA:
dev_err(hdev->dev,
- "Device boot progress - BTL did NOT run\n");
+ "Device boot progress - BTL/ROM did NOT run\n");
break;
case CPU_BOOT_STATUS_IN_WFE:
dev_err(hdev->dev,
@@ -1101,9 +1206,8 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
- (status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
timeout);
if (rc) {
@@ -1250,8 +1354,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
* 3. FW application - a. Fetch fw application security status
* b. Check whether hard reset is done by fw app
*/
- prop->hard_reset_done_by_fw =
- !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
+ prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
cpu_boot_dev_sts0);
@@ -1287,11 +1390,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
{
int rc;
- /* pldm was added for cases in which we use preboot on pldm and want
- * to load boot fit, but we can't wait for preboot because it runs
- * very slowly
- */
- if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
/*
@@ -1437,7 +1536,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
status,
FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
timeout);
if (rc) {
@@ -1703,6 +1802,9 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
return rc;
}
+ /* here we can mark the descriptor as valid as the content has been validated */
+ fw_loader->dynamic_loader.fw_desc_valid = true;
+
return 0;
}
@@ -1759,7 +1861,13 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
return rc;
}
- /* extract address copy the descriptor from */
+ /*
+ * extract address to copy the descriptor from
+ * in addition, as the descriptor value is going to be over-ridden by new data- we mark it
+ * as invalid.
+ * it will be marked again as valid once validated
+ */
+ fw_loader->dynamic_loader.fw_desc_valid = false;
src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
response->ram_offset;
memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
@@ -1920,17 +2028,15 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- /* Clear reset status since we need to read it again from boot CPU */
- prop->hard_reset_done_by_fw = false;
+ hdev->fw_loader.fw_comp_loaded |= FW_TYPE_BOOT_CPU;
/* Read boot_cpu status bits */
if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_bootfit_cpu_boot_dev_sts0 =
RREG32(cpu_boot_dev_sts0_reg);
- if (prop->fw_bootfit_cpu_boot_dev_sts0 &
- CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
- prop->hard_reset_done_by_fw = true;
+ prop->hard_reset_done_by_fw = !!(prop->fw_bootfit_cpu_boot_dev_sts0 &
+ CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
dev_dbg(hdev->dev, "Firmware boot CPU status0 %#x\n",
prop->fw_bootfit_cpu_boot_dev_sts0);
@@ -2055,14 +2161,21 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
dyn_loader = &fw_loader->dynamic_loader;
- /* Make sure CPU boot-loader is running */
+ /*
+ * Make sure CPU boot-loader is running
+ * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
+ * yet there is a debug scenario in which we loading uboot (without Linux)
+ * which at later stage is relocated to DRAM. In this case we expect
+ * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
+ * poll flags
+ */
rc = hl_poll_timeout(
hdev,
le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
status,
- (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
- (status == CPU_BOOT_STATUS_READY_TO_BOOT),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+ (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ hdev->fw_poll_interval_usec,
dyn_loader->wait_for_bl_timeout);
if (rc) {
dev_err(hdev->dev, "failed to wait for boot\n");
@@ -2082,14 +2195,14 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
dyn_loader = &fw_loader->dynamic_loader;
- /* Make sure CPU boot-loader is running */
+ /* Make sure CPU linux is running */
rc = hl_poll_timeout(
hdev,
le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
fw_loader->cpu_timeout);
if (rc) {
dev_err(hdev->dev, "failed to wait for Linux\n");
@@ -2121,18 +2234,14 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- hdev->fw_loader.linux_loaded = true;
-
- /* Clear reset status since we need to read again from app */
- prop->hard_reset_done_by_fw = false;
+ hdev->fw_loader.fw_comp_loaded |= FW_TYPE_LINUX;
/* Read FW application security bits */
if (prop->fw_cpu_boot_dev_sts0_valid) {
prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
- if (prop->fw_app_cpu_boot_dev_sts0 &
- CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
- prop->hard_reset_done_by_fw = true;
+ prop->hard_reset_done_by_fw = !!(prop->fw_app_cpu_boot_dev_sts0 &
+ CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
if (prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
@@ -2247,6 +2356,9 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
dev_info(hdev->dev,
"Loading firmware to device, may take some time...\n");
+ /* initialize FW descriptor as invalid */
+ fw_loader->dynamic_loader.fw_desc_valid = false;
+
/*
* In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
* It will be updated from FW after hl_fw_dynamic_request_descriptor().
@@ -2259,14 +2371,14 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
if (rc)
goto protocol_err;
- if (hdev->curr_reset_cause) {
+ if (hdev->reset_info.curr_reset_cause) {
rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
- HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause);
+ HL_COMMS_RESET_CAUSE_TYPE, &hdev->reset_info.curr_reset_cause);
if (rc)
goto protocol_err;
/* Clear current reset cause */
- hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
@@ -2288,6 +2400,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err;
}
+ /*
+ * when testing FW load (without Linux) on PLDM we don't want to
+ * wait until boot fit is active as it may take several hours.
+ * instead, we load the bootfit and let it do all initializations in
+ * the background.
+ */
+ if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
+ return 0;
+
rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
if (rc)
goto protocol_err;
@@ -2333,7 +2454,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
return 0;
protocol_err:
- fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
+ if (fw_loader->dynamic_loader.fw_desc_valid)
+ fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
le32_to_cpu(dyn_regs->cpu_boot_err1),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
@@ -2380,7 +2502,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout);
if (rc) {
@@ -2403,7 +2525,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_msg_status_reg,
status,
status == CPU_MSG_OK,
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout);
if (rc) {
@@ -2416,7 +2538,14 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
WREG32(msg_to_cpu_reg, KMD_MSG_NA);
}
- /* Make sure CPU boot-loader is running */
+ /*
+ * Make sure CPU boot-loader is running
+ * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
+ * yet there is a debug scenario in which we loading uboot (without Linux)
+ * which at later stage is relocated to DRAM. In this case we expect
+ * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
+ * poll flags
+ */
rc = hl_poll_timeout(
hdev,
cpu_boot_status_reg,
@@ -2425,7 +2554,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
cpu_timeout);
dev_dbg(hdev->dev, "uboot status = %d\n", status);
@@ -2474,7 +2603,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
cpu_timeout);
if (rc) {
@@ -2494,7 +2623,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
- FW_CPU_STATUS_POLL_INTERVAL_USEC,
+ hdev->fw_poll_interval_usec,
cpu_timeout);
/* Clear message */
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index a2002cbf794b..cb710fd478b6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -61,6 +61,8 @@
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
+
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
@@ -117,37 +119,37 @@ enum hl_mmu_page_table_location {
/*
* Reset Flags
*
- * - HL_RESET_HARD
+ * - HL_DRV_RESET_HARD
* If set do hard reset to all engines. If not set reset just
* compute/DMA engines.
*
- * - HL_RESET_FROM_RESET_THREAD
+ * - HL_DRV_RESET_FROM_RESET_THR
* Set if the caller is the hard-reset thread
*
- * - HL_RESET_HEARTBEAT
+ * - HL_DRV_RESET_HEARTBEAT
* Set if reset is due to heartbeat
*
- * - HL_RESET_TDR
+ * - HL_DRV_RESET_TDR
* Set if reset is due to TDR
*
- * - HL_RESET_DEVICE_RELEASE
+ * - HL_DRV_RESET_DEV_RELEASE
* Set if reset is due to device release
*
- * - HL_RESET_FW
+ * - HL_DRV_RESET_BYPASS_REQ_TO_FW
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
* only when running with secured f/w
*
- * - HL_RESET_FW_FATAL_ERR
+ * - HL_DRV_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
*/
-#define HL_RESET_HARD (1 << 0)
-#define HL_RESET_FROM_RESET_THREAD (1 << 1)
-#define HL_RESET_HEARTBEAT (1 << 2)
-#define HL_RESET_TDR (1 << 3)
-#define HL_RESET_DEVICE_RELEASE (1 << 4)
-#define HL_RESET_FW (1 << 5)
-#define HL_RESET_FW_FATAL_ERR (1 << 6)
+#define HL_DRV_RESET_HARD (1 << 0)
+#define HL_DRV_RESET_FROM_RESET_THR (1 << 1)
+#define HL_DRV_RESET_HEARTBEAT (1 << 2)
+#define HL_DRV_RESET_TDR (1 << 3)
+#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
+#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
+#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
#define HL_MAX_SOBS_PER_MONITOR 8
@@ -219,6 +221,7 @@ enum hl_fw_component {
/**
* enum hl_fw_types - F/W types present in the system
+ * @FW_TYPE_NONE: no FW component indication
* @FW_TYPE_LINUX: Linux image for device CPU
* @FW_TYPE_BOOT_CPU: Boot image for device CPU
* @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system
@@ -226,6 +229,7 @@ enum hl_fw_component {
* @FW_TYPE_ALL_TYPES: Mask for all types
*/
enum hl_fw_types {
+ FW_TYPE_NONE = 0x0,
FW_TYPE_LINUX = 0x1,
FW_TYPE_BOOT_CPU = 0x2,
FW_TYPE_PREBOOT_CPU = 0x4,
@@ -353,6 +357,21 @@ enum vm_type {
};
/**
+ * enum mmu_op_flags - mmu operation relevant information.
+ * @MMU_OP_USERPTR: operation on user memory (host resident).
+ * @MMU_OP_PHYS_PACK: operation on DRAM (device resident).
+ * @MMU_OP_CLEAR_MEMCACHE: operation has to clear memcache.
+ * @MMU_OP_SKIP_LOW_CACHE_INV: operation is allowed to skip parts of cache invalidation.
+ */
+enum mmu_op_flags {
+ MMU_OP_USERPTR = 0x1,
+ MMU_OP_PHYS_PACK = 0x2,
+ MMU_OP_CLEAR_MEMCACHE = 0x4,
+ MMU_OP_SKIP_LOW_CACHE_INV = 0x8,
+};
+
+
+/**
* enum hl_device_hw_state - H/W device state. use this to understand whether
* to do reset before hw_init or not
* @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
@@ -382,6 +401,7 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @hop5_mask: mask to get the PTE address in hop 5.
+ * @last_mask: mask to get the bit indicating this is the last hop.
* @page_size: default page size used to allocate memory.
* @num_hops: The amount of hops supported by the translation table.
* @host_resident: Should the MMU page table reside in host memory or in the
@@ -402,6 +422,7 @@ struct hl_mmu_properties {
u64 hop3_mask;
u64 hop4_mask;
u64 hop5_mask;
+ u64 last_mask;
u32 page_size;
u32 num_hops;
u8 host_resident;
@@ -524,6 +545,15 @@ struct hl_hints_range {
* @dynamic_fw_load: is dynamic FW load is supported.
* @gic_interrupts_enable: true if FW is not blocking GIC controller,
* false otherwise.
+ * @use_get_power_for_reset_history: To support backward compatibility for Goya
+ * and Gaudi
+ * @supports_soft_reset: is soft reset supported.
+ * @allow_inference_soft_reset: true if the ASIC supports soft reset that is
+ * initiated by user or TDR. This is only true
+ * in inference ASICs, as there is no real-world
+ * use-case of doing soft-reset in training (due
+ * to the fact that training runs on multiple
+ * devices)
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -604,6 +634,9 @@ struct asic_fixed_properties {
u8 iatu_done_by_fw;
u8 dynamic_fw_load;
u8 gic_interrupts_enable;
+ u8 use_get_power_for_reset_history;
+ u8 supports_soft_reset;
+ u8 allow_inference_soft_reset;
};
/**
@@ -852,10 +885,15 @@ struct hl_user_interrupt {
* pending on an interrupt
* @wait_list_node: node in the list of user threads pending on an interrupt
* @fence: hl fence object for interrupt completion
+ * @cq_target_value: CQ target value
+ * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
+ * handler for taget value comparison
*/
struct hl_user_pending_interrupt {
struct list_head wait_list_node;
struct hl_fence fence;
+ u64 cq_target_value;
+ u64 *cq_kernel_addr;
};
/**
@@ -1010,6 +1048,7 @@ struct fw_response {
* @image_region: region to copy the FW image to
* @fw_image_size: size of FW image to load
* @wait_for_bl_timeout: timeout for waiting for boot loader to respond
+ * @fw_desc_valid: true if FW descriptor has been validated and hence the data can be used
*/
struct dynamic_fw_load_mgr {
struct fw_response response;
@@ -1017,6 +1056,7 @@ struct dynamic_fw_load_mgr {
struct pci_mem_region *image_region;
size_t fw_image_size;
u32 wait_for_bl_timeout;
+ bool fw_desc_valid;
};
/**
@@ -1042,7 +1082,8 @@ struct fw_image_props {
* @skip_bmc: should BMC be skipped
* @sram_bar_id: SRAM bar ID
* @dram_bar_id: DRAM bar ID
- * @linux_loaded: true if linux was loaded so far
+ * @fw_comp_loaded: bitmask of loaded FW components. set bit meaning loaded
+ * component. values are set according to enum hl_fw_types.
*/
struct fw_load_mgr {
union {
@@ -1056,7 +1097,7 @@ struct fw_load_mgr {
u8 skip_bmc;
u8 sram_bar_id;
u8 dram_bar_id;
- u8 linux_loaded;
+ u8 fw_comp_loaded;
};
/**
@@ -1128,7 +1169,7 @@ struct fw_load_mgr {
* @disable_clock_gating: disable clock gating completely
* @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
- * @soft_reset_late_init: perform certain actions needed after soft reset.
+ * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
* @hw_queues_lock: acquire H/W queues lock.
* @hw_queues_unlock: release H/W queues lock.
* @get_pci_id: retrieve PCI ID.
@@ -1261,10 +1302,10 @@ struct hl_asic_funcs {
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
- int (*debug_coresight)(struct hl_device *hdev, void *data);
+ int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s);
- int (*soft_reset_late_init)(struct hl_device *hdev);
+ int (*non_hard_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
u32 (*get_pci_id)(struct hl_device *hdev);
@@ -1276,7 +1317,7 @@ struct hl_asic_funcs {
int (*init_iatu)(struct hl_device *hdev);
u32 (*rreg)(struct hl_device *hdev, u32 reg);
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
- void (*halt_coresight)(struct hl_device *hdev);
+ void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
int (*ctx_init)(struct hl_ctx *ctx);
void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
@@ -1518,6 +1559,9 @@ struct hl_userptr {
* @submission_time_jiffies: submission time of the cs
* @type: CS_TYPE_*.
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
+ * @sob_addr_offset: sob offset from the configuration base address.
+ * @initial_sob_count: count of completed signals in SOB before current submission of signal or
+ * cs with encaps signals.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
* @timedout : true if CS was timedout.
@@ -1553,6 +1597,8 @@ struct hl_cs {
u64 submission_time_jiffies;
enum hl_cs_type type;
u32 encaps_sig_hdl_id;
+ u32 sob_addr_offset;
+ u16 initial_sob_count;
u8 submitted;
u8 completed;
u8 timedout;
@@ -1792,7 +1838,6 @@ struct hl_debug_params {
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
* @restore_phase_mutex: lock for context switch and restore phase.
- * @is_control: true for control device, false otherwise
*/
struct hl_fpriv {
struct hl_device *hdev;
@@ -1805,7 +1850,6 @@ struct hl_fpriv {
struct list_head dev_node;
struct kref refcount;
struct mutex restore_phase_mutex;
- u8 is_control;
};
@@ -1864,6 +1908,7 @@ struct hl_debugfs_entry {
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
+ * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read.
*/
struct hl_dbg_device_entry {
struct dentry *root;
@@ -1892,6 +1937,7 @@ struct hl_dbg_device_entry {
u8 i2c_bus;
u8 i2c_addr;
u8 i2c_reg;
+ u8 i2c_len;
};
/**
@@ -2180,13 +2226,13 @@ struct hwmon_chip_info;
* @wq: work queue for device reset procedure.
* @reset_work: reset work to be done.
* @hdev: habanalabs device structure.
- * @fw_reset: whether f/w will do the reset without us sending them a message to do it.
+ * @flags: reset flags.
*/
struct hl_device_reset_work {
struct workqueue_struct *wq;
struct delayed_work reset_work;
struct hl_device *hdev;
- bool fw_reset;
+ u32 flags;
};
/**
@@ -2328,12 +2374,10 @@ struct multi_cs_completion {
* @ctx: pointer to the context structure
* @fence_arr: array of fences of all CSs
* @seq_arr: array of CS sequence numbers
- * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
* @timestamp: timestamp of first completed CS
* @wait_status: wait for CS status
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
- * @stream_master_qid_map: bitmap of all stream master QIDs on which the
- * multi-CS is waiting
* @arr_len: fence_arr and seq_arr array length
* @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
* @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
@@ -2342,17 +2386,114 @@ struct multi_cs_data {
struct hl_ctx *ctx;
struct hl_fence **fence_arr;
u64 *seq_arr;
- s64 timeout_us;
+ s64 timeout_jiffies;
s64 timestamp;
long wait_status;
u32 completion_bitmap;
- u32 stream_master_qid_map;
u8 arr_len;
u8 gone_cs;
u8 update_ts;
};
/**
+ * struct hl_clk_throttle_timestamp - current/last clock throttling timestamp
+ * @start: timestamp taken when 'start' event is received in driver
+ * @end: timestamp taken when 'end' event is received in driver
+ */
+struct hl_clk_throttle_timestamp {
+ ktime_t start;
+ ktime_t end;
+};
+
+/**
+ * struct hl_clk_throttle - keeps current/last clock throttling timestamps
+ * @timestamp: timestamp taken by driver and firmware, index 0 refers to POWER
+ * index 1 refers to THERMAL
+ * @lock: protects this structure as it can be accessed from both event queue
+ * context and info_ioctl context
+ * @current_reason: bitmask represents the current clk throttling reasons
+ * @aggregated_reason: bitmask represents aggregated clk throttling reasons since driver load
+ */
+struct hl_clk_throttle {
+ struct hl_clk_throttle_timestamp timestamp[HL_CLK_THROTTLE_TYPE_MAX];
+ struct mutex lock;
+ u32 current_reason;
+ u32 aggregated_reason;
+};
+
+/**
+ * struct last_error_session_info - info about last session in which CS timeout or
+ * razwi error occurred.
+ * @open_dev_timestamp: device open timestamp.
+ * @cs_timeout_timestamp: CS timeout timestamp.
+ * @razwi_timestamp: razwi timestamp.
+ * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the
+ * first (root cause) CS timeout will not be overwritten.
+ * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the
+ * first (root cause) razwi will not be overwritten.
+ * @cs_timeout_seq: CS timeout sequence number.
+ * @razwi_addr: address that caused razwi.
+ * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does
+ * not have engine id it will be set to U16_MAX.
+ * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
+ * engines which one them caused the razwi. In that case, it will contain the
+ * second possible engine id, otherwise it will be set to U16_MAX.
+ * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id.
+ * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
+ */
+struct last_error_session_info {
+ ktime_t open_dev_timestamp;
+ ktime_t cs_timeout_timestamp;
+ ktime_t razwi_timestamp;
+ atomic_t cs_write_disable;
+ atomic_t razwi_write_disable;
+ u64 cs_timeout_seq;
+ u64 razwi_addr;
+ u16 razwi_engine_id_1;
+ u16 razwi_engine_id_2;
+ u8 razwi_non_engine_initiator;
+ u8 razwi_type;
+};
+
+/**
+ * struct hl_reset_info - holds current device reset information.
+ * @lock: lock to protect critical reset flows.
+ * @soft_reset_cnt: number of soft reset since the driver was loaded.
+ * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
+ * here we hold the hard reset flags.
+ * @in_reset: is device in reset flow.
+ * @is_in_soft_reset: Device is currently in soft reset process.
+ * @needs_reset: true if reset_on_lockup is false and device should be reset
+ * due to lockup.
+ * @hard_reset_pending: is there a hard reset work pending.
+ * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
+ * triggered, and cleared after it is shared with preboot.
+ * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
+ * with a new value on next reset
+ * @reset_trigger_repeated: set if device reset is triggered more than once with
+ * same cause.
+ * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
+ * complete instead.
+ */
+struct hl_reset_info {
+ spinlock_t lock;
+ u32 soft_reset_cnt;
+ u32 hard_reset_cnt;
+ u32 hard_reset_schedule_flags;
+ u8 in_reset;
+ u8 is_in_soft_reset;
+ u8 needs_reset;
+ u8 hard_reset_pending;
+
+ u8 curr_reset_cause;
+ u8 prev_reset_trigger;
+ u8 reset_trigger_repeated;
+
+ u8 skip_reset_on_timeout;
+};
+
+/**
* struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
* @pcie_bar_phys: array of available PCIe bars physical addresses.
@@ -2363,7 +2504,6 @@ struct multi_cs_data {
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
* @dev: related kernel basic device structure.
* @dev_ctrl: related kernel device structure for the control device
- * @work_freq: delayed work to lower device frequency if possible.
* @work_heartbeat: delayed work for CPU-CP is-alive check.
* @device_reset_work: delayed work which performs hard reset
* @asic_name: ASIC specific name.
@@ -2398,7 +2538,6 @@ struct multi_cs_data {
* @asic_specific: ASIC specific information to use only from ASIC files.
* @vm: virtual memory manager for MMU.
* @hwmon_dev: H/W monitor device.
- * @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
* @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager.
@@ -2410,8 +2549,10 @@ struct multi_cs_data {
* @internal_cb_va_base: internal cb pool mmu virtual address base
* @fpriv_list: list of file private data structures. Each structure is created
* when a user opens the device
+ * @fpriv_ctrl_list: list of file private data structures. Each structure is created
+ * when a user opens the control device
* @fpriv_list_lock: protects the fpriv_list
- * @compute_ctx: current compute context executing.
+ * @fpriv_ctrl_list_lock: protects the fpriv_ctrl_list
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
@@ -2419,6 +2560,10 @@ struct multi_cs_data {
* @pci_mem_region: array of memory regions in the PCI
* @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion.
+ * @clk_throttling: holds information about current/previous clock throttling events
+ * @reset_info: holds current device reset information.
+ * @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2434,20 +2579,17 @@ struct multi_cs_data {
* device initialization. Mainly used to debug and
* workaround firmware bugs
* @dram_pci_bar_start: start bus address of PCIe bar towards DRAM.
+ * @last_successful_open_ktime: timestamp (ktime) of the last successful device open.
* @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open
* session.
* @open_counter: number of successful device open operations.
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
+ * @fw_poll_interval_usec: FW status poll interval in usec.
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
* @major: habanalabs kernel driver major.
* @high_pll: high PLL profile frequency.
- * @soft_reset_cnt: number of soft reset since the driver was loaded.
- * @hard_reset_cnt: number of hard reset since the driver was loaded.
- * @clk_throttling_reason: bitmask represents the current clk throttling reasons
* @id: device minor.
* @id_control: minor of the control device
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
@@ -2455,7 +2597,6 @@ struct multi_cs_data {
* @disabled: is device disabled.
* @late_init_done: is late init stage was done during initialization.
* @hwmon_initialized: is H/W monitor sensors was initialized.
- * @hard_reset_pending: is there a hard reset work pending.
* @heartbeat: is heartbeat sanity check towards CPU-CP enabled.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
* otherwise.
@@ -2467,8 +2608,9 @@ struct multi_cs_data {
* @init_done: is the initialization of the device done.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
* @dma_mask: the dma mask that was set for this device
- * @in_debug: is device under debug. This, together with fpriv_list, enforces
- * that only a single user is configuring the debug infrastructure.
+ * @in_debug: whether the device is in a state where the profiling/tracing infrastructure
+ * can be used. This indication is needed because in some ASICs we need to do
+ * specific operations to enable that infrastructure.
* @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
* only to POWER9 machines.
* @cdev_sysfs_created: were char devices and sysfs nodes created.
@@ -2477,34 +2619,18 @@ struct multi_cs_data {
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
* @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported.
- * @supports_soft_reset: is soft reset supported.
- * @allow_inference_soft_reset: true if the ASIC supports soft reset that is
- * initiated by user or TDR. This is only true
- * in inference ASICs, as there is no real-world
- * use-case of doing soft-reset in training (due
- * to the fact that training runs on multiple
- * devices)
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
- * @needs_reset: true if reset_on_lockup is false and device should be reset
- * due to lockup.
* @process_kill_trial_cnt: number of trials reset thread tried killing
* user processes
* @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish
* @supports_staged_submission: true if staged submissions are supported
- * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
- * triggered, and cleared after it is shared with preboot.
- * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
- * with a new value on next reset
- * @reset_trigger_repeated: set if device reset is triggered more than once with
- * same cause.
- * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
- * complete instead.
* @device_cpu_is_halted: Flag to indicate whether the device CPU was already
* halted. We can't halt it again because the COMMS
* protocol will throw an error. Relevant only for
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
+ * @is_compute_ctx_active: Whether there is an active compute context executing.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -2515,7 +2641,6 @@ struct hl_device {
struct cdev cdev_ctrl;
struct device *dev;
struct device *dev_ctrl;
- struct delayed_work work_freq;
struct delayed_work work_heartbeat;
struct hl_device_reset_work device_reset_work;
char asic_name[HL_STR_MAX];
@@ -2546,7 +2671,6 @@ struct hl_device {
void *asic_specific;
struct hl_vm vm;
struct device *hwmon_dev;
- enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
struct hl_dbg_device_entry hl_debugfs;
@@ -2560,9 +2684,9 @@ struct hl_device {
u64 internal_cb_va_base;
struct list_head fpriv_list;
+ struct list_head fpriv_ctrl_list;
struct mutex fpriv_list_lock;
-
- struct hl_ctx *compute_ctx;
+ struct mutex fpriv_ctrl_list_lock;
struct hl_cs_counters_atomic aggregated_cs_counters;
@@ -2577,6 +2701,11 @@ struct hl_device {
struct multi_cs_completion multi_cs_completion[
MULTI_CS_MAX_USER_CTX];
+ struct hl_clk_throttle clk_throttling;
+ struct last_error_session_info last_error;
+
+ struct hl_reset_info reset_info;
+
u32 *stream_master_qid_arr;
atomic64_t dram_used_mem;
u64 timeout_jiffies;
@@ -2587,21 +2716,17 @@ struct hl_device {
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
- atomic_t in_reset;
- enum hl_pll_frequency curr_pll_profile;
+ u64 fw_poll_interval_usec;
+ ktime_t last_successful_open_ktime;
enum cpucp_card_types card_type;
u32 major;
u32 high_pll;
- u32 soft_reset_cnt;
- u32 hard_reset_cnt;
- u32 clk_throttling_reason;
u16 id;
u16 id_control;
u16 cpu_pci_msb_addr;
u8 disabled;
u8 late_init_done;
u8 hwmon_initialized;
- u8 hard_reset_pending;
u8 heartbeat;
u8 reset_on_lockup;
u8 dram_default_page_mapping;
@@ -2618,20 +2743,14 @@ struct hl_device {
u8 sync_stream_queue_idx;
u8 collective_mon_idx;
u8 supports_coresight;
- u8 supports_soft_reset;
- u8 allow_inference_soft_reset;
u8 supports_cb_mapping;
- u8 needs_reset;
u8 process_kill_trial_cnt;
u8 device_fini_pending;
u8 supports_staged_submission;
- u8 curr_reset_cause;
- u8 prev_reset_trigger;
- u8 reset_trigger_repeated;
- u8 skip_reset_on_timeout;
u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
+ u8 is_compute_ctx_active;
/* Parameters for bring-up */
u64 nic_ports_mask;
@@ -2659,6 +2778,7 @@ struct hl_device {
* wait cs are used to wait of the reserved encaps signals.
* @hdev: pointer to habanalabs device structure.
* @hw_sob: pointer to H/W SOB used in the reservation.
+ * @ctx: pointer to the user's context data structure
* @cs_seq: staged cs sequence which contains encapsulated signals
* @id: idr handler id to be used to fetch the handler info
* @q_idx: stream queue index
@@ -2669,6 +2789,7 @@ struct hl_cs_encaps_sig_handle {
struct kref refcount;
struct hl_device *hdev;
struct hl_hw_sob *hw_sob;
+ struct hl_ctx *ctx;
u64 cs_seq;
u32 id;
u32 q_idx;
@@ -2757,21 +2878,9 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
u64 range_start_address, u64 range_end_address)
{
- u64 end_address = address + size;
+ u64 end_address = address + size - 1;
- if ((address >= range_start_address) &&
- (address < range_end_address))
- return true;
-
- if ((end_address >= range_start_address) &&
- (end_address < range_end_address))
- return true;
-
- if ((address < range_start_address) &&
- (end_address >= range_end_address))
- return true;
-
- return false;
+ return ((address <= range_end_address) && (range_start_address <= end_address));
}
int hl_device_open(struct inode *inode, struct file *filp);
@@ -2779,10 +2888,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp);
bool hl_device_operational(struct hl_device *hdev,
enum hl_device_status *status);
enum hl_device_status hl_device_status(struct hl_device *hdev);
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
- enum hl_asic_type asic_type, int minor);
-void destroy_hdev(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable);
int hl_hw_queues_create(struct hl_device *hdev);
void hl_hw_queues_destroy(struct hl_device *hdev);
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
@@ -2821,6 +2927,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
void hl_ctx_do_release(struct kref *ref);
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
+struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev);
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
struct hl_fence **fence, u32 arr_len);
@@ -2834,7 +2941,6 @@ int hl_device_resume(struct hl_device *hdev);
int hl_device_reset(struct hl_device *hdev, u32 flags);
void hl_hpriv_get(struct hl_fpriv *hpriv);
int hl_hpriv_put(struct hl_fpriv *hpriv);
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
@@ -2915,6 +3021,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
u64 phys_addr, u32 size);
int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
+int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
+int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
+ u32 flags, u32 asid, u64 va, u64 size);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
@@ -2969,6 +3078,10 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
struct fw_load_mgr *fw_loader,
enum comms_cmd cmd, unsigned int size,
bool wait_ok, u32 timeout);
+int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
+ struct cpucp_hbm_row_info *info);
+int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
+int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 949d1b5c5c41..690b763c7a95 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -153,15 +153,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
goto out_err;
}
- if (hdev->in_debug) {
- dev_err_ratelimited(hdev->dev,
- "Can't open %s because it is being debugged by another user\n",
- dev_name(hdev->dev));
- rc = -EPERM;
- goto out_err;
- }
-
- if (hdev->compute_ctx) {
+ if (hdev->is_compute_ctx_active) {
dev_dbg_ratelimited(hdev->dev,
"Can't open %s because another user is working on it\n",
dev_name(hdev->dev));
@@ -175,20 +167,17 @@ int hl_device_open(struct inode *inode, struct file *filp)
goto out_err;
}
- /* Device is IDLE at this point so it is legal to change PLLs.
- * There is no need to check anything because if the PLL is
- * already HIGH, the set function will return without doing
- * anything
- */
- hl_device_set_frequency(hdev, PLL_HIGH);
-
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
hl_debugfs_add_file(hpriv);
+ atomic_set(&hdev->last_error.cs_write_disable, 0);
+ atomic_set(&hdev->last_error.razwi_write_disable, 0);
+
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
+ hdev->last_successful_open_ktime = ktime_get();
return 0;
@@ -231,12 +220,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
- hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
- mutex_lock(&hdev->fpriv_list_lock);
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl,
@@ -246,13 +234,13 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
goto out_err;
}
- list_add(&hpriv->dev_node, &hdev->fpriv_list);
- mutex_unlock(&hdev->fpriv_list_lock);
+ list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
return 0;
out_err:
- mutex_unlock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
filp->private_data = NULL;
put_pid(hpriv->taskpid);
@@ -263,6 +251,7 @@ out_err:
static void set_driver_behavior_per_device(struct hl_device *hdev)
{
+ hdev->pldm = 0;
hdev->fw_components = FW_TYPE_ALL_TYPES;
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
@@ -279,23 +268,53 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->axi_drain = 0;
}
-/*
+static void copy_kernel_module_params_to_device(struct hl_device *hdev)
+{
+ hdev->major = hl_major;
+ hdev->memory_scrub = memory_scrub;
+ hdev->reset_on_lockup = reset_on_lockup;
+ hdev->boot_error_status_mask = boot_error_status_mask;
+
+ if (timeout_locked)
+ hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
+ else
+ hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+}
+
+static int fixup_device_params(struct hl_device *hdev)
+{
+ hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
+
+ hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
+
+ hdev->stop_on_err = true;
+ hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+ hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
+
+ /* Enable only after the initialization of the device */
+ hdev->disabled = true;
+
+ /* Set default DMA mask to 32 bits */
+ hdev->dma_mask = 32;
+
+ return 0;
+}
+
+/**
* create_hdev - create habanalabs device instance
*
* @dev: will hold the pointer to the new habanalabs device structure
* @pdev: pointer to the pci device
- * @asic_type: in case of simulator device, which device is it
- * @minor: in case of simulator device, the minor of the device
*
* Allocate memory for habanalabs device and initialize basic fields
* Identify the ASIC type
* Allocate ID (minor) for the device (only for real devices)
*/
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
- enum hl_asic_type asic_type, int minor)
+static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
{
+ int main_id, ctrl_id = 0, rc = 0;
struct hl_device *hdev;
- int rc, main_id, ctrl_id = 0;
*dev = NULL;
@@ -303,69 +322,39 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
if (!hdev)
return -ENOMEM;
- /* First, we must find out which ASIC are we handling. This is needed
- * to configure the behavior of the driver (kernel parameters)
- */
- if (pdev) {
- hdev->asic_type = get_asic_type(pdev->device);
- if (hdev->asic_type == ASIC_INVALID) {
- dev_err(&pdev->dev, "Unsupported ASIC\n");
- rc = -ENODEV;
- goto free_hdev;
- }
- } else {
- hdev->asic_type = asic_type;
- }
-
- if (pdev)
- hdev->asic_prop.fw_security_enabled =
- is_asic_secured(hdev->asic_type);
- else
- hdev->asic_prop.fw_security_enabled = false;
+ /* can be NULL in case of simulator device */
+ hdev->pdev = pdev;
/* Assign status description string */
- strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
- "operational", HL_STR_MAX);
- strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
- "in reset", HL_STR_MAX);
- strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
- "disabled", HL_STR_MAX);
- strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
- "needs reset", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
- hdev->major = hl_major;
- hdev->reset_on_lockup = reset_on_lockup;
- hdev->memory_scrub = memory_scrub;
- hdev->boot_error_status_mask = boot_error_status_mask;
- hdev->stop_on_err = true;
+ /* First, we must find out which ASIC are we handling. This is needed
+ * to configure the behavior of the driver (kernel parameters)
+ */
+ hdev->asic_type = get_asic_type(pdev->device);
+ if (hdev->asic_type == ASIC_INVALID) {
+ dev_err(&pdev->dev, "Unsupported ASIC\n");
+ rc = -ENODEV;
+ goto free_hdev;
+ }
- hdev->pldm = 0;
+ copy_kernel_module_params_to_device(hdev);
set_driver_behavior_per_device(hdev);
- hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
- hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
-
- if (timeout_locked)
- hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
- else
- hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
-
- hdev->disabled = true;
- hdev->pdev = pdev; /* can be NULL in case of simulator device */
-
- /* Set default DMA mask to 32 bits */
- hdev->dma_mask = 32;
+ fixup_device_params(hdev);
mutex_lock(&hl_devs_idr_lock);
/* Always save 2 numbers, 1 for main device and 1 for control.
* They must be consecutive
*/
- main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
- GFP_KERNEL);
+ main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
if (main_id >= 0)
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
@@ -405,7 +394,7 @@ free_hdev:
* @dev: pointer to the habanalabs device structure
*
*/
-void destroy_hdev(struct hl_device *hdev)
+static void destroy_hdev(struct hl_device *hdev)
{
/* Remove device from the device list */
mutex_lock(&hl_devs_idr_lock);
@@ -444,7 +433,7 @@ static int hl_pmops_resume(struct device *dev)
return hl_device_resume(hdev);
}
-/*
+/**
* hl_pci_probe - probe PCI habanalabs devices
*
* @pdev: pointer to pci device
@@ -454,8 +443,7 @@ static int hl_pmops_resume(struct device *dev)
* Create a new habanalabs device and initialize it according to the
* device's type
*/
-static int hl_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct hl_device *hdev;
int rc;
@@ -464,7 +452,7 @@ static int hl_pci_probe(struct pci_dev *pdev,
" device found [%04x:%04x] (rev %x)\n",
(int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
- rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
+ rc = create_hdev(&hdev, pdev);
if (rc)
return rc;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 86c3257d9ae1..3ba3a8ffda3e 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -158,7 +158,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
}
-static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
+static int debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_debug_args *args)
{
struct hl_debug_params *params;
void *input = NULL, *output = NULL;
@@ -200,7 +200,7 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
params->output_size = args->output_size;
}
- rc = hdev->asic_funcs->debug_coresight(hdev, params);
+ rc = hdev->asic_funcs->debug_coresight(hdev, ctx, params);
if (rc) {
dev_err(hdev->dev,
"debug coresight operation failed %d\n", rc);
@@ -269,8 +269,8 @@ static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
- reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
+ reset_count.hard_reset_cnt = hdev->reset_info.hard_reset_cnt;
+ reset_count.soft_reset_cnt = hdev->reset_info.soft_reset_cnt;
return copy_to_user(out, &reset_count,
min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
@@ -313,15 +313,38 @@ static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
struct hl_device *hdev = hpriv->hdev;
struct hl_info_clk_throttle clk_throttle = {0};
+ ktime_t end_time, zero_time = ktime_set(0, 0);
u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int i;
if ((!max_size) || (!out))
return -EINVAL;
- clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
+ mutex_lock(&hdev->clk_throttling.lock);
+
+ clk_throttle.clk_throttling_reason = hdev->clk_throttling.current_reason;
+
+ for (i = 0 ; i < HL_CLK_THROTTLE_TYPE_MAX ; i++) {
+ if (!(hdev->clk_throttling.aggregated_reason & BIT(i)))
+ continue;
+
+ clk_throttle.clk_throttling_timestamp_us[i] =
+ ktime_to_us(hdev->clk_throttling.timestamp[i].start);
+
+ if (ktime_compare(hdev->clk_throttling.timestamp[i].end, zero_time))
+ end_time = hdev->clk_throttling.timestamp[i].end;
+ else
+ end_time = ktime_get();
+
+ clk_throttle.clk_throttling_duration_ns[i] =
+ ktime_to_ns(ktime_sub(end_time,
+ hdev->clk_throttling.timestamp[i].start));
+
+ }
+ mutex_unlock(&hdev->clk_throttling.lock);
return copy_to_user(out, &clk_throttle,
min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
@@ -480,6 +503,94 @@ static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
}
+static int dram_pending_rows_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ u32 pend_rows_num = 0;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_dram_pending_row_get(hdev, &pend_rows_num);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &pend_rows_num,
+ min_t(size_t, max_size, sizeof(pend_rows_num))) ? -EFAULT : 0;
+}
+
+static int dram_replaced_rows_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ struct cpucp_hbm_row_info info = {0};
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_dram_replaced_row_get(hdev, &info);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_info_last_err_open_dev_time info = {0};
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp);
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_info_cs_timeout_event info = {0};
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.seq = hdev->last_error.cs_timeout_seq;
+ info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp);
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ struct hl_info_razwi_event info = {0};
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp);
+ info.addr = hdev->last_error.razwi_addr;
+ info.engine_id_1 = hdev->last_error.razwi_engine_id_1;
+ info.engine_id_2 = hdev->last_error.razwi_engine_id_2;
+ info.no_engine_id = hdev->last_error.razwi_non_engine_initiator;
+ info.error_type = hdev->last_error.razwi_type;
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -503,6 +614,33 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_RESET_COUNT:
return get_reset_count(hdev, args);
+ case HL_INFO_HW_EVENTS:
+ return hw_events_info(hdev, false, args);
+
+ case HL_INFO_HW_EVENTS_AGGREGATE:
+ return hw_events_info(hdev, true, args);
+
+ case HL_INFO_CS_COUNTERS:
+ return cs_counters_info(hpriv, args);
+
+ case HL_INFO_CLK_THROTTLE_REASON:
+ return clk_throttle_info(hpriv, args);
+
+ case HL_INFO_SYNC_MANAGER:
+ return sync_manager_info(hpriv, args);
+
+ case HL_INFO_OPEN_STATS:
+ return open_stats_info(hpriv, args);
+
+ case HL_INFO_LAST_ERR_OPEN_DEV_TIME:
+ return last_err_open_dev_info(hpriv, args);
+
+ case HL_INFO_CS_TIMEOUT_EVENT:
+ return cs_timeout_info(hpriv, args);
+
+ case HL_INFO_RAZWI_EVENT:
+ return razwi_info(hpriv, args);
+
default:
break;
}
@@ -515,10 +653,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
}
switch (args->op) {
- case HL_INFO_HW_EVENTS:
- rc = hw_events_info(hdev, false, args);
- break;
-
case HL_INFO_DRAM_USAGE:
rc = dram_usage_info(hpriv, args);
break;
@@ -531,10 +665,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
rc = device_utilization(hdev, args);
break;
- case HL_INFO_HW_EVENTS_AGGREGATE:
- rc = hw_events_info(hdev, true, args);
- break;
-
case HL_INFO_CLK_RATE:
rc = get_clk_rate(hdev, args);
break;
@@ -542,18 +672,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_TIME_SYNC:
return time_sync_info(hdev, args);
- case HL_INFO_CS_COUNTERS:
- return cs_counters_info(hpriv, args);
-
case HL_INFO_PCI_COUNTERS:
return pci_counters_info(hpriv, args);
- case HL_INFO_CLK_THROTTLE_REASON:
- return clk_throttle_info(hpriv, args);
-
- case HL_INFO_SYNC_MANAGER:
- return sync_manager_info(hpriv, args);
-
case HL_INFO_TOTAL_ENERGY:
return total_energy_consumption_info(hpriv, args);
@@ -563,12 +684,16 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_POWER:
return power_info(hpriv, args);
- case HL_INFO_OPEN_STATS:
- return open_stats_info(hpriv, args);
+
+ case HL_INFO_DRAM_REPLACED_ROWS:
+ return dram_replaced_rows_info(hpriv, args);
+
+ case HL_INFO_DRAM_PENDING_ROWS:
+ return dram_pending_rows_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
- rc = -ENOTTY;
+ rc = -EINVAL;
break;
}
@@ -613,16 +738,17 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
"Rejecting debug configuration request because device not in debug mode\n");
return -EFAULT;
}
- args->input_size =
- min(args->input_size, hl_debug_struct_size[args->op]);
- rc = debug_coresight(hdev, args);
+ args->input_size = min(args->input_size, hl_debug_struct_size[args->op]);
+ rc = debug_coresight(hdev, hpriv->ctx, args);
break;
+
case HL_DEBUG_OP_SET_MODE:
- rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+ rc = hl_device_set_debug_mode(hdev, hpriv->ctx, (bool) args->enable);
break;
+
default:
dev_err(hdev->dev, "Invalid request %d\n", args->op);
- rc = -ENOTTY;
+ rc = -EINVAL;
break;
}
@@ -649,7 +775,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev)
{
struct hl_fpriv *hpriv = filep->private_data;
- struct hl_device *hdev = hpriv->hdev;
unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128] = {0};
char *kdata = NULL;
@@ -658,12 +783,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
u32 hl_size;
int retcode;
- if (hdev->hard_reset_pending) {
- dev_crit_ratelimited(dev,
- "Device HARD reset pending! Please close FD\n");
- return -ENODEV;
- }
-
/* Do not trust userspace, use our own definition */
func = ioctl->func;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 0743319b10c7..6103e479e855 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -429,6 +429,9 @@ static int init_signal_cs(struct hl_device *hdev,
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
false);
+ job->cs->sob_addr_offset = hw_sob->sob_addr;
+ job->cs->initial_sob_count = prop->next_sob_val - 1;
+
return rc;
}
@@ -571,7 +574,7 @@ static int encaps_sig_first_staged_cs_handler
struct hl_encaps_signals_mgr *mgr;
int rc = 0;
- mgr = &hdev->compute_ctx->sig_mgr;
+ mgr = &cs->ctx->sig_mgr;
spin_lock(&mgr->lock);
encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index e33f65be8a00..57f5d2c48330 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -10,17 +10,148 @@
#include <linux/pci.h>
#include <linux/hwmon.h>
-#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
+#define HWMON_NR_SENSOR_TYPES (hwmon_max)
-int hl_build_hwmon_channel_info(struct hl_device *hdev,
- struct cpucp_sensor *sensors_arr)
+#ifdef _HAS_HWMON_HWMON_T_ENABLE
+
+static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types type,
+ u32 cpucp_flags)
{
- u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
- u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
+ u32 flags;
+
+ switch (type) {
+ case hwmon_temp:
+ flags = (cpucp_flags << 1) | HWMON_T_ENABLE;
+ break;
+
+ case hwmon_in:
+ flags = (cpucp_flags << 1) | HWMON_I_ENABLE;
+ break;
+
+ case hwmon_curr:
+ flags = (cpucp_flags << 1) | HWMON_C_ENABLE;
+ break;
+
+ case hwmon_fan:
+ flags = (cpucp_flags << 1) | HWMON_F_ENABLE;
+ break;
+
+ case hwmon_power:
+ flags = (cpucp_flags << 1) | HWMON_P_ENABLE;
+ break;
+
+ case hwmon_pwm:
+ /* enable bit was here from day 1, so no need to adjust */
+ flags = cpucp_flags;
+ break;
+
+ default:
+ dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+ flags = cpucp_flags;
+ break;
+ }
+
+ return flags;
+}
+
+static u32 fixup_attr_legacy_fw(u32 attr)
+{
+ return (attr - 1);
+}
+
+#else
+
+static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types type,
+ u32 cpucp_flags)
+{
+ return cpucp_flags;
+}
+
+static u32 fixup_attr_legacy_fw(u32 attr)
+{
+ return attr;
+}
+
+#endif /* !_HAS_HWMON_HWMON_T_ENABLE */
+
+static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types type, u32 cpucp_flags)
+{
+ u32 flags, cpucp_input_val;
+ bool use_cpucp_enum;
+
+ use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+ CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
+
+ /* If f/w is using it's own enum, we need to check if the properties values are aligned.
+ * If not, it means we need to adjust the values to the new format that is used in the
+ * kernel since 5.6 (enum values were incremented by 1 by adding a new enable value).
+ */
+ if (use_cpucp_enum) {
+ switch (type) {
+ case hwmon_temp:
+ cpucp_input_val = cpucp_temp_input;
+ if (cpucp_input_val == hwmon_temp_input)
+ flags = cpucp_flags;
+ else
+ flags = (cpucp_flags << 1) | HWMON_T_ENABLE;
+ break;
+
+ case hwmon_in:
+ cpucp_input_val = cpucp_in_input;
+ if (cpucp_input_val == hwmon_in_input)
+ flags = cpucp_flags;
+ else
+ flags = (cpucp_flags << 1) | HWMON_I_ENABLE;
+ break;
+
+ case hwmon_curr:
+ cpucp_input_val = cpucp_curr_input;
+ if (cpucp_input_val == hwmon_curr_input)
+ flags = cpucp_flags;
+ else
+ flags = (cpucp_flags << 1) | HWMON_C_ENABLE;
+ break;
+
+ case hwmon_fan:
+ cpucp_input_val = cpucp_fan_input;
+ if (cpucp_input_val == hwmon_fan_input)
+ flags = cpucp_flags;
+ else
+ flags = (cpucp_flags << 1) | HWMON_F_ENABLE;
+ break;
+
+ case hwmon_pwm:
+ /* enable bit was here from day 1, so no need to adjust */
+ flags = cpucp_flags;
+ break;
+
+ case hwmon_power:
+ cpucp_input_val = CPUCP_POWER_INPUT;
+ if (cpucp_input_val == hwmon_power_input)
+ flags = cpucp_flags;
+ else
+ flags = (cpucp_flags << 1) | HWMON_P_ENABLE;
+ break;
+
+ default:
+ dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+ flags = cpucp_flags;
+ break;
+ }
+ } else {
+ flags = fixup_flags_legacy_fw(hdev, type, cpucp_flags);
+ }
+
+ return flags;
+}
+
+int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr)
+{
+ u32 num_sensors_for_type, flags, num_active_sensor_types = 0, arr_size = 0, *curr_arr;
u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
+ u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
struct hwmon_channel_info **channels_info;
- u32 num_sensors_for_type, num_active_sensor_types = 0,
- arr_size = 0, *curr_arr;
+ u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
enum hwmon_sensor_types type;
int rc, i, j;
@@ -31,8 +162,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
break;
if (type >= HWMON_NR_SENSOR_TYPES) {
- dev_err(hdev->dev,
- "Got wrong sensor type %d from device\n", type);
+ dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type);
return -EINVAL;
}
@@ -45,8 +175,9 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
continue;
num_sensors_for_type = counts[i] + 1;
- curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
- GFP_KERNEL);
+ dev_dbg(hdev->dev, "num_sensors_for_type %d = %d\n", i, num_sensors_for_type);
+
+ curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), GFP_KERNEL);
if (!curr_arr) {
rc = -ENOMEM;
goto sensors_type_err;
@@ -59,20 +190,18 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
for (i = 0 ; i < arr_size ; i++) {
type = le32_to_cpu(sensors_arr[i].type);
curr_arr = sensors_by_type[type];
- curr_arr[sensors_by_type_next_index[type]++] =
- le32_to_cpu(sensors_arr[i].flags);
+ flags = adjust_hwmon_flags(hdev, type, le32_to_cpu(sensors_arr[i].flags));
+ curr_arr[sensors_by_type_next_index[type]++] = flags;
}
- channels_info = kcalloc(num_active_sensor_types + 1,
- sizeof(*channels_info), GFP_KERNEL);
+ channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL);
if (!channels_info) {
rc = -ENOMEM;
goto channels_info_array_err;
}
for (i = 0 ; i < num_active_sensor_types ; i++) {
- channels_info[i] = kzalloc(sizeof(*channels_info[i]),
- GFP_KERNEL);
+ channels_info[i] = kzalloc(sizeof(*channels_info[i]), GFP_KERNEL);
if (!channels_info[i]) {
rc = -ENOMEM;
goto channel_info_err;
@@ -88,18 +217,19 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
j++;
}
- hdev->hl_chip_info->info =
- (const struct hwmon_channel_info **)channels_info;
+ hdev->hl_chip_info->info = (const struct hwmon_channel_info **)channels_info;
return 0;
channel_info_err:
- for (i = 0 ; i < num_active_sensor_types ; i++)
+ for (i = 0 ; i < num_active_sensor_types ; i++) {
if (channels_info[i]) {
kfree(channels_info[i]->config);
kfree(channels_info[i]);
}
+ }
kfree(channels_info);
+
channels_info_array_err:
sensors_type_err:
for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
@@ -112,14 +242,16 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct hl_device *hdev = dev_get_drvdata(dev);
- int rc;
+ bool use_cpucp_enum;
u32 cpucp_attr;
- bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
- CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
+ int rc;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
+ use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+ CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
+
switch (type) {
case hwmon_temp:
switch (attr) {
@@ -151,7 +283,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
else
- rc = hl_get_temperature(hdev, channel, attr, val);
+ rc = hl_get_temperature(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_in:
switch (attr) {
@@ -174,7 +306,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
else
- rc = hl_get_voltage(hdev, channel, attr, val);
+ rc = hl_get_voltage(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_curr:
switch (attr) {
@@ -197,7 +329,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_current(hdev, channel, cpucp_attr, val);
else
- rc = hl_get_current(hdev, channel, attr, val);
+ rc = hl_get_current(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_fan:
switch (attr) {
@@ -217,7 +349,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
else
- rc = hl_get_fan_speed(hdev, channel, attr, val);
+ rc = hl_get_fan_speed(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_pwm:
switch (attr) {
@@ -234,6 +366,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
else
+ /* no need for fixup as pwm was aligned from day 1 */
rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
case hwmon_power:
@@ -251,7 +384,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
rc = hl_get_power(hdev, channel, cpucp_attr, val);
else
- rc = hl_get_power(hdev, channel, attr, val);
+ rc = hl_get_power(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
default:
return -EINVAL;
@@ -286,7 +419,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
hl_set_temperature(hdev, channel, cpucp_attr, val);
else
- hl_set_temperature(hdev, channel, attr, val);
+ hl_set_temperature(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_pwm:
switch (attr) {
@@ -303,6 +436,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
hl_set_pwm_info(hdev, channel, cpucp_attr, val);
else
+ /* no need for fixup as pwm was aligned from day 1 */
hl_set_pwm_info(hdev, channel, attr, val);
break;
case hwmon_in:
@@ -317,7 +451,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
hl_set_voltage(hdev, channel, cpucp_attr, val);
else
- hl_set_voltage(hdev, channel, attr, val);
+ hl_set_voltage(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_curr:
switch (attr) {
@@ -331,7 +465,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
hl_set_current(hdev, channel, cpucp_attr, val);
else
- hl_set_current(hdev, channel, attr, val);
+ hl_set_current(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
case hwmon_power:
switch (attr) {
@@ -345,7 +479,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
if (use_cpucp_enum)
hl_set_power(hdev, channel, cpucp_attr, val);
else
- hl_set_power(hdev, channel, attr, val);
+ hl_set_power(hdev, channel, fixup_attr_legacy_fw(attr), val);
break;
default:
return -EINVAL;
@@ -444,6 +578,9 @@ int hl_get_temperature(struct hl_device *hdev,
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
+ dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n",
+ pkt.ctl, pkt.sensor_index, pkt.type);
+
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
@@ -677,12 +814,18 @@ int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct cpucp_packet pkt;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
memset(&pkt, 0, sizeof(pkt));
- pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+ if (prop->use_get_power_for_reset_history)
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
+ else
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_SET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index 96d82b682674..1b6bdc900c26 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -145,8 +145,12 @@ static void handle_user_cq(struct hl_device *hdev,
spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
- pend->fence.timestamp = now;
- complete_all(&pend->fence.completion);
+ if ((pend->cq_kernel_addr &&
+ *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+ !pend->cq_kernel_addr) {
+ pend->fence.timestamp = now;
+ complete_all(&pend->fence.completion);
+ }
}
spin_unlock(&user_cq->wait_list_lock);
}
@@ -245,10 +249,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
*/
dma_rmb();
- if (hdev->disabled) {
- dev_warn(hdev->dev,
- "Device disabled but received IRQ %d for EQ\n",
- irq);
+ if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
+ dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
goto skip_irq;
}
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 9bd626a00de3..c1eefaebacb6 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -316,7 +316,7 @@ static int free_phys_pg_pack(struct hl_device *hdev,
}
if (rc && !hdev->disabled)
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
end:
kvfree(phys_pg_pack->pages);
@@ -477,7 +477,7 @@ static int add_va_block_locked(struct hl_device *hdev,
struct list_head *va_list, u64 start, u64 end)
{
struct hl_vm_va_block *va_block, *res = NULL;
- u64 size = end - start;
+ u64 size = end - start + 1;
print_va_list_locked(hdev, va_list);
@@ -518,7 +518,7 @@ static int add_va_block_locked(struct hl_device *hdev,
/**
* add_va_block() - wrapper for add_va_block_locked.
* @hdev: pointer to the habanalabs device structure.
- * @va_list: pointer to the virtual addresses block list.
+ * @va_range: pointer to the virtual addresses range object.
* @start: start virtual address.
* @end: end virtual address.
*
@@ -538,8 +538,11 @@ static inline int add_va_block(struct hl_device *hdev,
}
/**
- * is_hint_crossing_range() - check if hint address crossing specified reserved
- * range.
+ * is_hint_crossing_range() - check if hint address crossing specified reserved.
+ * @range_type: virtual space range type.
+ * @start_addr: start virtual address.
+ * @size: block size.
+ * @prop: asic properties structure to retrieve reserved ranges from.
*/
static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
@@ -644,7 +647,7 @@ static u64 get_va_block(struct hl_device *hdev,
continue;
}
- valid_size = va_block->end - valid_start;
+ valid_size = va_block->end - valid_start + 1;
if (valid_size < size)
continue;
@@ -707,7 +710,7 @@ static u64 get_va_block(struct hl_device *hdev,
if (new_va_block->size > size) {
new_va_block->start += size;
- new_va_block->size = new_va_block->end - new_va_block->start;
+ new_va_block->size = new_va_block->end - new_va_block->start + 1;
} else {
list_del(&new_va_block->node);
kfree(new_va_block);
@@ -749,6 +752,7 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
/**
* hl_get_va_range_type() - get va_range type for the given address and size.
+ * @ctx: context to fetch va_range from.
* @address: the start address of the area we want to validate.
* @size: the size in bytes of the area we want to validate.
* @type: returned va_range type.
@@ -776,8 +780,8 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
* hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
* @hdev: pointer to the habanalabs device structure
* @ctx: pointer to the context structure.
- * @start: start virtual address.
- * @end: end virtual address.
+ * @start_addr: start virtual address.
+ * @size: number of bytes to unreserve.
*
* This function does the following:
* - Takes the list lock and calls add_va_block_locked.
@@ -1201,17 +1205,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto map_err;
}
- rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
- *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
+ rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
+ ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
- if (rc) {
- dev_err(hdev->dev,
- "mapping handle %u failed due to MMU cache invalidation\n",
- handle);
+ if (rc)
goto map_err;
- }
ret_vaddr += phys_pg_pack->offset;
@@ -1349,9 +1349,8 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
* at the loop end rather than for each iteration
*/
if (!ctx_free)
- rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
- *vm_type, ctx->asid, vaddr,
- phys_pg_pack->total_size);
+ rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
+ phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
@@ -1364,11 +1363,6 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
if (!ctx_free) {
int tmp_rc;
- if (rc)
- dev_err(hdev->dev,
- "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
- vaddr);
-
tmp_rc = add_va_block(hdev, va_range, vaddr,
vaddr + phys_pg_pack->total_size - 1);
if (tmp_rc) {
@@ -2037,7 +2031,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
+ rc = -EINVAL;
break;
}
@@ -2162,7 +2156,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
+ rc = -EINVAL;
break;
}
@@ -2339,6 +2333,8 @@ void hl_userptr_delete_list(struct hl_device *hdev,
/**
* hl_userptr_is_pinned() - returns whether the given userptr is pinned.
* @hdev: pointer to the habanalabs device structure.
+ * @addr: user address to check.
+ * @size: user block size to check.
* @userptr_list: pointer to the list to clear.
* @userptr: pointer to userptr to check.
*
@@ -2361,9 +2357,10 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
/**
* va_range_init() - initialize virtual addresses range.
* @hdev: pointer to the habanalabs device structure.
- * @va_range: pointer to the range to initialize.
+ * @va_ranges: pointer to va_ranges array.
* @start: range start address.
* @end: range end address.
+ * @page_size: page size for this va_range.
*
* This function does the following:
* - Initializes the virtual addresses list of the given range with the given
@@ -2388,8 +2385,14 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
start += PAGE_SIZE;
}
- if (end & (PAGE_SIZE - 1))
- end &= PAGE_MASK;
+ /*
+ * The end of the range is inclusive, hence we need to align it
+ * to the end of the last full page in the range. For example if
+ * end = 0x3ff5 with page size 0x1000, we need to align it to
+ * 0x2fff. The remainig 0xff5 bytes do not form a full page.
+ */
+ if ((end + 1) & (PAGE_SIZE - 1))
+ end = ((end + 1) & PAGE_MASK) - 1;
}
if (start >= end) {
@@ -2414,7 +2417,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
/**
* va_range_fini() - clear a virtual addresses range.
* @hdev: pointer to the habanalabs structure.
- * va_range: pointer to virtual addresses rang.e
+ * @va_range: pointer to virtual addresses range.
*
* This function does the following:
* - Frees the virtual addresses block list and its lock.
@@ -2434,12 +2437,15 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
* @ctx: pointer to the habanalabs context structure.
* @host_range_start: host virtual addresses range start.
* @host_range_end: host virtual addresses range end.
+ * @host_page_size: host page size.
* @host_huge_range_start: host virtual addresses range start for memory
* allocated with huge pages.
* @host_huge_range_end: host virtual addresses range end for memory allocated
* with huge pages.
+ * @host_huge_page_size: host huge page size.
* @dram_range_start: dram virtual addresses range start.
* @dram_range_end: dram virtual addresses range end.
+ * @dram_page_size: dram page size.
*
* This function initializes the following:
* - MMU for context.
@@ -2564,14 +2570,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
return 0;
dram_range_start = prop->dmmu.start_addr;
- dram_range_end = prop->dmmu.end_addr;
+ dram_range_end = prop->dmmu.end_addr - 1;
dram_page_size = prop->dram_page_size ?
prop->dram_page_size : prop->dmmu.page_size;
host_range_start = prop->pmmu.start_addr;
- host_range_end = prop->pmmu.end_addr;
+ host_range_end = prop->pmmu.end_addr - 1;
host_page_size = prop->pmmu.page_size;
host_huge_range_start = prop->pmmu_huge.start_addr;
- host_huge_range_end = prop->pmmu_huge.end_addr;
+ host_huge_range_end = prop->pmmu_huge.end_addr - 1;
host_huge_page_size = prop->pmmu_huge.page_size;
return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
@@ -2618,7 +2624,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
* Clearly something went wrong on hard reset so no point in printing
* another side effect error
*/
- if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
+ if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash))
dev_dbg(hdev->dev,
"user released device without removing its memory mappings\n");
@@ -2633,8 +2639,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
mutex_lock(&ctx->mmu_lock);
/* invalidate the cache once after the unmapping loop */
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+ hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
+ hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
mutex_unlock(&ctx->mmu_lock);
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index aa96917f62e5..9153a1f55175 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -637,3 +637,28 @@ u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
{
return addr;
}
+
+int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
+{
+ int rc;
+
+ rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
+ if (rc)
+ dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n");
+
+ return rc;
+}
+
+int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
+ u32 flags, u32 asid, u64 va, u64 size)
+{
+ int rc;
+
+ rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags,
+ asid, va, size);
+ if (rc)
+ dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n");
+
+ return rc;
+}
+
diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 0f536f79dd9c..6134b6ae7615 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -269,7 +269,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
num_of_hop3 = prop->dram_size_for_default_page_mapping;
do_div(num_of_hop3, prop->dram_page_size);
- do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+ do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
/* add hop1 and hop2 */
total_hops = num_of_hop3 + 2;
@@ -330,7 +330,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
- for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+ for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
write_final_pte(ctx, hop3_pte_addr, pte_val);
get_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
@@ -369,7 +369,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
num_of_hop3 = prop->dram_size_for_default_page_mapping;
do_div(num_of_hop3, prop->dram_page_size);
- do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+ do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
hop0_addr = get_hop0_addr(ctx);
/* add hop1 and hop2 */
@@ -379,7 +379,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
- for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+ for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
clear_pte(ctx, hop3_pte_addr);
put_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
@@ -573,7 +573,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
- is_huge = curr_pte & LAST_MASK;
+ is_huge = curr_pte & mmu_prop->last_mask;
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev,
@@ -597,7 +597,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
if (hdev->dram_default_page_mapping && is_dram_addr) {
u64 default_pte = (prop->mmu_dram_default_page_addr &
- HOP_PHYS_ADDR_MASK) | LAST_MASK |
+ HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
PAGE_PRESENT_MASK;
if (curr_pte == default_pte) {
dev_err(hdev->dev,
@@ -729,7 +729,7 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (hdev->dram_default_page_mapping && is_dram_addr) {
u64 default_pte = (prop->mmu_dram_default_page_addr &
- HOP_PHYS_ADDR_MASK) | LAST_MASK |
+ HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
PAGE_PRESENT_MASK;
if (curr_pte != default_pte) {
@@ -769,7 +769,7 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
goto err;
}
- curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
+ curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
| PAGE_PRESENT_MASK;
if (is_huge)
@@ -930,7 +930,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
return -EFAULT;
- if (hops->hop_info[i].hop_pte_val & LAST_MASK)
+ if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
break;
}
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 42c1769ad25d..45c715325e2a 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -139,7 +139,7 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "0x%08x\n",
- hdev->asic_prop.cpucp_info.cpld_version);
+ le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version));
}
static ssize_t cpucp_kernel_ver_show(struct device *dev,
@@ -163,8 +163,13 @@ static ssize_t infineon_ver_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- return sprintf(buf, "0x%04x\n",
- hdev->asic_prop.cpucp_info.infineon_version);
+ if (hdev->asic_prop.cpucp_info.infineon_second_stage_version)
+ return sprintf(buf, "%#04x %#04x\n",
+ le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version),
+ le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version));
+ else
+ return sprintf(buf, "%#04x\n",
+ le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version));
}
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
@@ -206,7 +211,7 @@ static ssize_t soft_reset_store(struct device *dev,
goto out;
}
- if (!hdev->allow_inference_soft_reset) {
+ if (!hdev->asic_prop.allow_inference_soft_reset) {
dev_err(hdev->dev, "Device does not support inference soft-reset\n");
goto out;
}
@@ -236,7 +241,7 @@ static ssize_t hard_reset_store(struct device *dev,
dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
out:
return count;
@@ -298,7 +303,7 @@ static ssize_t soft_reset_cnt_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
+ return sprintf(buf, "%d\n", hdev->reset_info.soft_reset_cnt);
}
static ssize_t hard_reset_cnt_show(struct device *dev,
@@ -306,7 +311,7 @@ static ssize_t hard_reset_cnt_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
+ return sprintf(buf, "%d\n", hdev->reset_info.hard_reset_cnt);
}
static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
@@ -419,8 +424,6 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_max_power.attr,
&dev_attr_pci_addr.attr,
&dev_attr_preboot_btl_ver.attr,
- &dev_attr_soft_reset.attr,
- &dev_attr_soft_reset_cnt.attr,
&dev_attr_status.attr,
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
@@ -445,15 +448,25 @@ static const struct attribute_group *hl_dev_attr_groups[] = {
NULL,
};
+static struct attribute *hl_dev_inference_attrs[] = {
+ &dev_attr_soft_reset.attr,
+ &dev_attr_soft_reset_cnt.attr,
+ NULL,
+};
+
+static struct attribute_group hl_dev_inference_attr_group = {
+ .attrs = hl_dev_inference_attrs,
+};
+
+static const struct attribute_group *hl_dev_inference_attr_groups[] = {
+ &hl_dev_inference_attr_group,
+ NULL,
+};
+
int hl_sysfs_init(struct hl_device *hdev)
{
int rc;
- if (hdev->asic_type == ASIC_GOYA)
- hdev->pm_mng_profile = PM_AUTO;
- else
- hdev->pm_mng_profile = PM_MANUAL;
-
hdev->max_power = hdev->asic_prop.max_power_default;
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
@@ -465,10 +478,25 @@ int hl_sysfs_init(struct hl_device *hdev)
return rc;
}
+ if (!hdev->asic_prop.allow_inference_soft_reset)
+ return 0;
+
+ rc = device_add_groups(hdev->dev, hl_dev_inference_attr_groups);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add groups to device, error %d\n", rc);
+ return rc;
+ }
+
return 0;
}
void hl_sysfs_fini(struct hl_device *hdev)
{
device_remove_groups(hdev->dev, hl_dev_attr_groups);
+
+ if (!hdev->asic_prop.allow_inference_soft_reset)
+ return;
+
+ device_remove_groups(hdev->dev, hl_dev_inference_attr_groups);
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 825737dfe381..013c6da2e3ca 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2020 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -593,26 +593,27 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE;
- prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->dram_supports_virtual_memory = false;
- prop->pmmu.hop0_shift = HOP0_SHIFT;
- prop->pmmu.hop1_shift = HOP1_SHIFT;
- prop->pmmu.hop2_shift = HOP2_SHIFT;
- prop->pmmu.hop3_shift = HOP3_SHIFT;
- prop->pmmu.hop4_shift = HOP4_SHIFT;
- prop->pmmu.hop0_mask = HOP0_MASK;
- prop->pmmu.hop1_mask = HOP1_MASK;
- prop->pmmu.hop2_mask = HOP2_MASK;
- prop->pmmu.hop3_mask = HOP3_MASK;
- prop->pmmu.hop4_mask = HOP4_MASK;
+ prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
+ prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
+ prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
+ prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
+ prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
+ prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
+ prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
+ prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
+ prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
+ prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr =
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
+ prop->pmmu.last_mask = LAST_MASK;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -664,6 +665,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->clk_pll_index = HL_GAUDI_MME_PLL;
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
+ prop->use_get_power_for_reset_history = true;
+
return 0;
}
@@ -878,6 +881,11 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
int rc;
if (hdev->asic_prop.fw_security_enabled) {
+ struct gaudi_device *gaudi = hdev->asic_specific;
+
+ if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
+ return 0;
+
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
if (rc)
@@ -1273,6 +1281,7 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
struct hl_cs_compl *cs_cmpl =
container_of(cs->fence, struct hl_cs_compl, base_fence);
+ struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
struct gaudi_collective_properties *cprop;
u32 stream, queue_id, sob_group_offset;
struct gaudi_device *gaudi;
@@ -1285,10 +1294,16 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
gaudi = hdev->asic_specific;
cprop = &gaudi->collective_props;
- /* In encaps signals case the SOB info will be retrieved from
- * the handle in gaudi_collective_slave_init_job.
- */
- if (!cs->encaps_signals) {
+ if (cs->encaps_signals) {
+ cs_cmpl->hw_sob = handle->hw_sob;
+ /* at this checkpoint we only need the hw_sob pointer
+ * for the completion check before start going over the jobs
+ * of the master/slaves, the sob_value will be taken later on
+ * in gaudi_collective_slave_init_job depends on each
+ * job wait offset value.
+ */
+ cs_cmpl->sob_val = 0;
+ } else {
/* copy the SOB id and value of the signal CS */
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
@@ -1621,6 +1636,8 @@ static int gaudi_late_init(struct hl_device *hdev)
*/
gaudi_mmu_prepare(hdev, 1);
+ hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
+
return 0;
disable_pci_access:
@@ -4006,7 +4023,7 @@ static void gaudi_init_firmware_loader(struct hl_device *hdev)
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
/* fill common fields */
- fw_loader->linux_loaded = false;
+ fw_loader->fw_comp_loaded = FW_TYPE_NONE;
fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
@@ -4289,13 +4306,31 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset
* via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
* registers in case of old F/Ws
*/
- if (hdev->fw_loader.linux_loaded) {
+ if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_halt_irq);
WREG32(irq_handler_offset,
gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
+
+ /* This is a hail-mary attempt to revive the card in the small chance that the
+ * f/w has experienced a watchdog event, which caused it to return back to preboot.
+ * In that case, triggering reset through GIC won't help. We need to trigger the
+ * reset as if Linux wasn't loaded.
+ *
+ * We do it only if the reset cause was HB, because that would be the indication
+ * of such an event.
+ *
+ * In case watchdog hasn't expired but we still got HB, then this won't do any
+ * damage.
+ */
+ if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
+ if (hdev->asic_prop.hard_reset_done_by_fw)
+ hl_fw_ask_hard_reset_without_linux(hdev);
+ else
+ hl_fw_ask_halt_machine_without_linux(hdev);
+ }
} else {
if (hdev->asic_prop.hard_reset_done_by_fw)
hl_fw_ask_hard_reset_without_linux(hdev);
@@ -6412,6 +6447,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
{
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
struct gaudi_device *gaudi = hdev->asic_specific;
+ u32 qm_glbl_sts0, qm_cgm_sts;
u64 dma_offset, qm_offset;
dma_addr_t dma_addr;
void *kernel_addr;
@@ -6436,14 +6472,20 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
dma_offset = dma_id * DMA_CORE_OFFSET;
qm_offset = dma_id * DMA_QMAN_OFFSET;
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
- is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+ qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
+ qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
+ is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
+ IS_DMA_IDLE(dma_core_sts0);
if (!is_eng_idle) {
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
dma_offset = dma_id * DMA_CORE_OFFSET;
qm_offset = dma_id * DMA_QMAN_OFFSET;
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
- is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+ qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
+ qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
+ is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
+ IS_DMA_IDLE(dma_core_sts0);
if (!is_eng_idle) {
dev_err_ratelimited(hdev->dev,
@@ -6522,7 +6564,7 @@ static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return U64_MAX;
return readq(hdev->pcie_bar[HBM_BAR_ID] +
@@ -6533,7 +6575,7 @@ static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return;
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
@@ -6935,8 +6977,9 @@ event_not_supported:
snprintf(desc, size, "N/A");
}
-static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
- u32 x_y, bool is_write)
+static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
+ bool is_write, s32 *engine_id_1,
+ s32 *engine_id_2)
{
u32 dma_id[2], dma_offset, err_cause[2], mask, i;
@@ -6976,44 +7019,64 @@ static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
switch (x_y) {
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
- if ((err_cause[0] & mask) && !(err_cause[1] & mask))
+ if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
return "DMA0";
- else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
+ } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
return "DMA2";
- else
+ } else {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
+ *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
return "DMA0 or DMA2";
+ }
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
- if ((err_cause[0] & mask) && !(err_cause[1] & mask))
+ if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
return "DMA1";
- else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
+ } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
return "DMA3";
- else
+ } else {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
+ *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
return "DMA1 or DMA3";
+ }
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
- if ((err_cause[0] & mask) && !(err_cause[1] & mask))
+ if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
return "DMA4";
- else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
+ } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
return "DMA6";
- else
+ } else {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
+ *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
return "DMA4 or DMA6";
+ }
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
- if ((err_cause[0] & mask) && !(err_cause[1] & mask))
+ if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
return "DMA5";
- else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
+ } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
return "DMA7";
- else
+ } else {
+ *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
+ *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
return "DMA5 or DMA7";
+ }
}
unknown_initiator:
return "unknown initiator";
}
-static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
- bool is_write)
+static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
+ u32 *engine_id_1, u32 *engine_id_2)
{
u32 val, x_y, axi_id;
@@ -7026,24 +7089,35 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
switch (x_y) {
case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
return "TPC0";
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
+ }
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
return "NIC0";
+ }
break;
case RAZWI_INITIATOR_ID_X_Y_TPC1:
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
return "TPC1";
case RAZWI_INITIATOR_ID_X_Y_MME0_0:
case RAZWI_INITIATOR_ID_X_Y_MME0_1:
+ *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
return "MME0";
case RAZWI_INITIATOR_ID_X_Y_MME1_0:
case RAZWI_INITIATOR_ID_X_Y_MME1_1:
+ *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
return "MME1";
case RAZWI_INITIATOR_ID_X_Y_TPC2:
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
return "TPC2";
case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
return "TPC3";
+ }
+ /* PCI, CPU or PSOC does not have engine id*/
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
return "PCI";
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
@@ -7059,32 +7133,49 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
- return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
+ return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
+ engine_id_1, engine_id_2);
case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
return "TPC4";
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
+ }
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
return "NIC1";
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
+ }
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
return "NIC2";
+ }
break;
case RAZWI_INITIATOR_ID_X_Y_TPC5:
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
return "TPC5";
case RAZWI_INITIATOR_ID_X_Y_MME2_0:
case RAZWI_INITIATOR_ID_X_Y_MME2_1:
+ *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
return "MME2";
case RAZWI_INITIATOR_ID_X_Y_MME3_0:
case RAZWI_INITIATOR_ID_X_Y_MME3_1:
+ *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
return "MME3";
case RAZWI_INITIATOR_ID_X_Y_TPC6:
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
return "TPC6";
case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
return "TPC7";
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
+ }
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
return "NIC4";
- if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
+ }
+ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
+ *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
return "NIC5";
+ }
break;
default:
break;
@@ -7101,27 +7192,28 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
return "unknown initiator";
}
-static void gaudi_print_razwi_info(struct hl_device *hdev)
+static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
+ u32 *engine_id_2)
{
+
if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
dev_err_ratelimited(hdev->dev,
"RAZWI event caused by illegal write of %s\n",
- gaudi_get_razwi_initiator_name(hdev, true));
+ gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
}
if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
dev_err_ratelimited(hdev->dev,
"RAZWI event caused by illegal read of %s\n",
- gaudi_get_razwi_initiator_name(hdev, false));
+ gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
}
}
-static void gaudi_print_mmu_error_info(struct hl_device *hdev)
+static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- u64 addr;
u32 val;
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
@@ -7129,24 +7221,24 @@ static void gaudi_print_mmu_error_info(struct hl_device *hdev)
val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
- addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
- addr <<= 32;
- addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
+ *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
+ *addr <<= 32;
+ *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
- dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
- addr);
+ dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
+ *type = HL_RAZWI_PAGE_FAULT;
WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
}
val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
- addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
- addr <<= 32;
- addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
+ *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
+ *addr <<= 32;
+ *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
- dev_err_ratelimited(hdev->dev,
- "MMU access error on va 0x%llx\n", addr);
+ dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
+ *type = HL_RAZWI_MMU_ACCESS_ERROR;
WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
}
@@ -7665,15 +7757,46 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
bool razwi)
{
+ u32 engine_id_1, engine_id_2;
char desc[64] = "";
+ u64 razwi_addr = 0;
+ u8 razwi_type;
+ int rc;
+
+ /*
+ * Init engine id by default as not valid and only if razwi initiated from engine with
+ * engine id it will get valid value.
+ * Init razwi type to default, will be changed only if razwi caused by page fault of
+ * MMU access error
+ */
+ engine_id_1 = U16_MAX;
+ engine_id_2 = U16_MAX;
+ razwi_type = U8_MAX;
gaudi_get_event_desc(event_type, desc, sizeof(desc));
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
if (razwi) {
- gaudi_print_razwi_info(hdev);
- gaudi_print_mmu_error_info(hdev);
+ gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
+ gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
+
+ /* In case it's the first razwi, save its parameters*/
+ rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
+ if (!rc) {
+ hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
+ hdev->last_error.razwi_timestamp = ktime_get();
+ hdev->last_error.razwi_addr = razwi_addr;
+ hdev->last_error.razwi_engine_id_1 = engine_id_1;
+ hdev->last_error.razwi_engine_id_2 = engine_id_2;
+ /*
+ * If first engine id holds non valid value the razwi initiator
+ * does not have engine id
+ */
+ hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
+ hdev->last_error.razwi_type = razwi_type;
+
+ }
}
}
@@ -7696,14 +7819,10 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
fw_alive->thread_id, fw_alive->uptime_seconds);
}
-static int gaudi_soft_reset_late_init(struct hl_device *hdev)
+static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
{
- struct gaudi_device *gaudi = hdev->asic_specific;
-
- /* Unmask all IRQs since some could have been received
- * during the soft reset
- */
- return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
+ /* GAUDI doesn't support any reset except hard-reset */
+ return -EPERM;
}
static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
@@ -7897,27 +8016,39 @@ static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
static void gaudi_print_clk_change_info(struct hl_device *hdev,
u16 event_type)
{
+ ktime_t zero_time = ktime_set(0, 0);
+
+ mutex_lock(&hdev->clk_throttling.lock);
+
switch (event_type) {
case GAUDI_EVENT_FIX_POWER_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
case GAUDI_EVENT_FIX_POWER_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -7927,6 +8058,8 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
event_type);
break;
}
+
+ mutex_unlock(&hdev->clk_throttling.lock);
}
static void gaudi_handle_eqe(struct hl_device *hdev,
@@ -7975,7 +8108,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
- fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
+ fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_GIC500:
@@ -7983,7 +8116,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
gaudi_print_irq_info(hdev, event_type, false);
- fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
+ fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_0:
@@ -7994,7 +8127,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data);
- fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
+ fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_1:
@@ -8177,9 +8310,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
reset_device:
if (hdev->asic_prop.fw_security_enabled)
- hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD
+ | HL_DRV_RESET_BYPASS_REQ_TO_FW
+ | fw_fatal_err_flag);
else if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
else
hl_fw_unmask_irq(hdev, event_type);
}
@@ -8206,7 +8341,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
int rc;
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
- hdev->hard_reset_pending)
+ hdev->reset_info.hard_reset_pending)
return 0;
if (hdev->pldm)
@@ -8229,12 +8364,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
WREG32(mmSTLB_INV_SET, 0);
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "MMU cache invalidation timeout\n");
- hl_device_reset(hdev, HL_RESET_HARD);
- }
-
return rc;
}
@@ -8662,7 +8791,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
hdev->internal_cb_pool_dma_addr,
HOST_SPACE_INTERNAL_CB_SZ);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
if (rc)
@@ -8697,7 +8826,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
HOST_SPACE_INTERNAL_CB_SZ);
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
HOST_SPACE_INTERNAL_CB_SZ);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
gen_pool_destroy(hdev->internal_cb_pool);
@@ -9458,7 +9587,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.disable_clock_gating = gaudi_disable_clock_gating,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,
- .soft_reset_late_init = gaudi_soft_reset_late_init,
+ .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
.hw_queues_lock = gaudi_hw_queues_lock,
.hw_queues_unlock = gaudi_hw_queues_unlock,
.get_pci_id = gaudi_get_pci_id,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index f325e36a71e6..8ac16a9b7d15 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -357,8 +357,8 @@ void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
-int gaudi_debug_coresight(struct hl_device *hdev, void *data);
-void gaudi_halt_coresight(struct hl_device *hdev);
+int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
+void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
#endif /* GAUDIP_H_ */
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 5349c1be13f9..08108f5fed67 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -848,7 +848,7 @@ static int gaudi_config_spmu(struct hl_device *hdev,
return 0;
}
-int gaudi_debug_coresight(struct hl_device *hdev, void *data)
+int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data)
{
struct hl_debug_params *params = data;
int rc = 0;
@@ -887,7 +887,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data)
return rc;
}
-void gaudi_halt_coresight(struct hl_device *hdev)
+void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx)
{
struct hl_debug_params params = {};
int i, rc;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5536e8c27bd5..fbcc7bbf44b3 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -410,25 +410,26 @@ int goya_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE;
- prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->dram_supports_virtual_memory = true;
- prop->dmmu.hop0_shift = HOP0_SHIFT;
- prop->dmmu.hop1_shift = HOP1_SHIFT;
- prop->dmmu.hop2_shift = HOP2_SHIFT;
- prop->dmmu.hop3_shift = HOP3_SHIFT;
- prop->dmmu.hop4_shift = HOP4_SHIFT;
- prop->dmmu.hop0_mask = HOP0_MASK;
- prop->dmmu.hop1_mask = HOP1_MASK;
- prop->dmmu.hop2_mask = HOP2_MASK;
- prop->dmmu.hop3_mask = HOP3_MASK;
- prop->dmmu.hop4_mask = HOP4_MASK;
+ prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT;
+ prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT;
+ prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT;
+ prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT;
+ prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT;
+ prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK;
+ prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK;
+ prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK;
+ prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK;
+ prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK;
prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
+ prop->dmmu.last_mask = LAST_MASK;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@@ -436,6 +437,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
+ prop->pmmu.last_mask = LAST_MASK;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -473,6 +475,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->clk_pll_index = HL_GOYA_MME_PLL;
+ prop->use_get_power_for_reset_history = true;
+
return 0;
}
@@ -735,6 +739,11 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
int rc;
if (hdev->asic_prop.fw_security_enabled) {
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
+ return;
+
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
pll_freq_arr);
@@ -778,9 +787,59 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
prop->psoc_pci_pll_div_factor = div_fctr;
}
+/*
+ * goya_set_frequency - set the frequency of the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @freq: the new frequency value
+ *
+ * Change the frequency if needed. This function has no protection against
+ * concurrency, therefore it is assumed that the calling function has protected
+ * itself against the case of calling this function from multiple threads with
+ * different values
+ *
+ * Returns 0 if no change was done, otherwise returns 1
+ */
+int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if ((goya->pm_mng_profile == PM_MANUAL) ||
+ (goya->curr_pll_profile == freq))
+ return 0;
+
+ dev_dbg(hdev->dev, "Changing device frequency to %s\n",
+ freq == PLL_HIGH ? "high" : "low");
+
+ goya_set_pll_profile(hdev, freq);
+
+ goya->curr_pll_profile = freq;
+
+ return 1;
+}
+
+static void goya_set_freq_to_low_job(struct work_struct *work)
+{
+ struct goya_work_freq *goya_work = container_of(work,
+ struct goya_work_freq,
+ work_freq.work);
+ struct hl_device *hdev = goya_work->hdev;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (!hdev->is_compute_ctx_active)
+ goya_set_frequency(hdev, PLL_LOW);
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ schedule_delayed_work(&goya_work->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+}
+
int goya_late_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
int rc;
goya_fetch_psoc_frequency(hdev);
@@ -829,6 +888,16 @@ int goya_late_init(struct hl_device *hdev)
return rc;
}
+ /* force setting to low frequency */
+ goya->curr_pll_profile = PLL_LOW;
+
+ goya->pm_mng_profile = PM_AUTO;
+
+ hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+
+ schedule_delayed_work(&goya->goya_work->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+
return 0;
}
@@ -842,8 +911,11 @@ int goya_late_init(struct hl_device *hdev)
void goya_late_fini(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
+ struct goya_device *goya = hdev->asic_specific;
int i = 0;
+ cancel_delayed_work_sync(&goya->goya_work->work_freq);
+
if (!hdev->hl_chip_info->info)
return;
@@ -961,12 +1033,21 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
- hdev->supports_soft_reset = true;
- hdev->allow_inference_soft_reset = true;
+ hdev->asic_prop.supports_soft_reset = true;
+ hdev->asic_prop.allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
hdev->asic_funcs->set_pci_memory_regions(hdev);
+ goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
+ if (!goya->goya_work) {
+ rc = -ENOMEM;
+ goto free_cpu_accessible_dma_pool;
+ }
+
+ goya->goya_work->hdev = hdev;
+ INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
+
return 0;
free_cpu_accessible_dma_pool:
@@ -1003,6 +1084,7 @@ static int goya_sw_fini(struct hl_device *hdev)
dma_pool_destroy(hdev->dma_pool);
+ kfree(goya->goya_work);
kfree(goya);
return 0;
@@ -2502,7 +2584,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
/* fill common fields */
- fw_loader->linux_loaded = false;
+ fw_loader->fw_comp_loaded = FW_TYPE_NONE;
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
@@ -2619,7 +2701,7 @@ int goya_mmu_init(struct hl_device *hdev)
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
- VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
+ MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF);
@@ -4395,7 +4477,7 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return U64_MAX;
return readq(hdev->pcie_bar[DDR_BAR_ID] +
@@ -4406,7 +4488,7 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct goya_device *goya = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return;
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
@@ -4731,7 +4813,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
return rc;
}
-static int goya_soft_reset_late_init(struct hl_device *hdev)
+static int goya_non_hard_reset_late_init(struct hl_device *hdev)
{
/*
* Unmask all IRQs since some could have been received
@@ -4764,24 +4846,39 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
{
+ ktime_t zero_time = ktime_set(0, 0);
+
+ mutex_lock(&hdev->clk_throttling.lock);
+
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -4791,6 +4888,8 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
event_type);
break;
}
+
+ mutex_unlock(&hdev->clk_throttling.lock);
}
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
@@ -4834,14 +4933,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, (HL_RESET_HARD |
- HL_RESET_FW_FATAL_ERR));
+ hl_device_reset(hdev, (HL_DRV_RESET_HARD |
+ HL_DRV_RESET_FW_FATAL_ERR));
break;
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
break;
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
@@ -4901,7 +5000,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_print_irq_info(hdev, event_type, false);
goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
else
hl_fw_unmask_irq(hdev, event_type);
break;
@@ -5209,7 +5308,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
- hdev->hard_reset_pending)
+ hdev->reset_info.hard_reset_pending)
return 0;
/* no need in L1 only invalidation in Goya */
@@ -5232,12 +5331,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
1000,
timeout_usec);
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "MMU cache invalidation timeout\n");
- hl_device_reset(hdev, HL_RESET_HARD);
- }
-
return rc;
}
@@ -5645,7 +5738,7 @@ static const struct hl_asic_funcs goya_funcs = {
.disable_clock_gating = goya_disable_clock_gating,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
- .soft_reset_late_init = goya_soft_reset_late_init,
+ .non_hard_reset_late_init = goya_non_hard_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock,
.hw_queues_unlock = goya_hw_queues_unlock,
.get_pci_id = goya_get_pci_id,
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 97add7b04f82..3740fd25bf84 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -153,9 +153,15 @@
#define HW_CAP_GOLDEN 0x00000400
#define HW_CAP_TPC 0x00000800
+struct goya_work_freq {
+ struct hl_device *hdev;
+ struct delayed_work work_freq;
+};
+
struct goya_device {
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
+ struct goya_work_freq *goya_work;
u64 mme_clk;
u64 tpc_clk;
@@ -166,6 +172,9 @@ struct goya_device {
u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
u8 device_cpu_mmu_mappings_done;
+
+ enum hl_pll_frequency curr_pll_profile;
+ enum hl_pm_mng_profile pm_mng_profile;
};
int goya_set_fixed_properties(struct hl_device *hdev);
@@ -211,8 +220,8 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int goya_cpucp_info_get(struct hl_device *hdev);
-int goya_debug_coresight(struct hl_device *hdev, void *data);
-void goya_halt_coresight(struct hl_device *hdev);
+int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
+void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
@@ -237,5 +246,6 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
u64 goya_get_device_time(struct hl_device *hdev);
+int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index c55c100fdd24..2c5133cfae65 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -652,7 +652,7 @@ static int goya_config_spmu(struct hl_device *hdev,
return 0;
}
-int goya_debug_coresight(struct hl_device *hdev, void *data)
+int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data)
{
struct hl_debug_params *params = data;
int rc = 0;
@@ -691,7 +691,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data)
return rc;
}
-void goya_halt_coresight(struct hl_device *hdev)
+void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx)
{
struct hl_debug_params params = {};
int i, rc;
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 59b2624ff81a..76b47749affe 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -62,7 +62,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
- if (hdev->pm_mng_profile == PM_AUTO) {
+ if (goya->pm_mng_profile == PM_AUTO) {
count = -EPERM;
goto fail;
}
@@ -111,7 +111,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
- if (hdev->pm_mng_profile == PM_AUTO) {
+ if (goya->pm_mng_profile == PM_AUTO) {
count = -EPERM;
goto fail;
}
@@ -160,7 +160,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
goto fail;
}
- if (hdev->pm_mng_profile == PM_AUTO) {
+ if (goya->pm_mng_profile == PM_AUTO) {
count = -EPERM;
goto fail;
}
@@ -234,13 +234,14 @@ static ssize_t pm_mng_profile_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
+ struct goya_device *goya = hdev->asic_specific;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
return sprintf(buf, "%s\n",
- (hdev->pm_mng_profile == PM_AUTO) ? "auto" :
- (hdev->pm_mng_profile == PM_MANUAL) ? "manual" :
+ (goya->pm_mng_profile == PM_AUTO) ? "auto" :
+ (goya->pm_mng_profile == PM_MANUAL) ? "manual" :
"unknown");
}
@@ -248,6 +249,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
+ struct goya_device *goya = hdev->asic_specific;
if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
@@ -256,7 +258,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,
mutex_lock(&hdev->fpriv_list_lock);
- if (hdev->compute_ctx) {
+ if (hdev->is_compute_ctx_active) {
dev_err(hdev->dev,
"Can't change PM profile while compute context is opened on the device\n");
count = -EPERM;
@@ -265,26 +267,27 @@ static ssize_t pm_mng_profile_store(struct device *dev,
if (strncmp("auto", buf, strlen("auto")) == 0) {
/* Make sure we are in LOW PLL when changing modes */
- if (hdev->pm_mng_profile == PM_MANUAL) {
- hdev->curr_pll_profile = PLL_HIGH;
- hdev->pm_mng_profile = PM_AUTO;
- hl_device_set_frequency(hdev, PLL_LOW);
+ if (goya->pm_mng_profile == PM_MANUAL) {
+ goya->curr_pll_profile = PLL_HIGH;
+ goya->pm_mng_profile = PM_AUTO;
+ goya_set_frequency(hdev, PLL_LOW);
}
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
- if (hdev->pm_mng_profile == PM_AUTO) {
+ if (goya->pm_mng_profile == PM_AUTO) {
/* Must release the lock because the work thread also
* takes this lock. But before we release it, set
* the mode to manual so nothing will change if a user
* suddenly opens the device
*/
- hdev->pm_mng_profile = PM_MANUAL;
+ goya->pm_mng_profile = PM_MANUAL;
mutex_unlock(&hdev->fpriv_list_lock);
/* Flush the current work so we can return to the user
* knowing that he is the only one changing frequencies
*/
- flush_delayed_work(&hdev->work_freq);
+ if (goya->goya_work)
+ flush_delayed_work(&goya->goya_work->work_freq);
return count;
}
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index ae13231fda94..737c39f33f05 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * Copyright 2020 HabanaLabs, Ltd.
+ * Copyright 2020-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -376,6 +376,19 @@ enum pq_init_status {
* and QMANs. The f/w will return a bitmask where each bit represents
* a different engine or QMAN according to enum cpucp_idle_mask.
* The bit will be 1 if the engine is NOT idle.
+ *
+ * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET -
+ * Fetch all HBM replaced-rows and prending to be replaced rows data.
+ *
+ * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS -
+ * Fetch status of HBM rows pending replacement and need a reboot to
+ * be replaced.
+ *
+ * CPUCP_PACKET_POWER_SET -
+ * Resets power history of device to 0
+ *
+ * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
+ * Packet to perform engine core ASID configuration
*/
enum cpucp_packet_id {
@@ -421,6 +434,11 @@ enum cpucp_packet_id {
CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */
CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */
CPUCP_PACKET_IS_IDLE_CHECK, /* internal */
+ CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */
+ CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */
+ CPUCP_PACKET_POWER_SET, /* internal */
+ CPUCP_PACKET_RESERVED, /* not used */
+ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
};
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -480,7 +498,14 @@ struct cpucp_packet {
__u8 i2c_bus;
__u8 i2c_addr;
__u8 i2c_reg;
- __u8 pad; /* unused */
+ /*
+ * In legacy implemetations, i2c_len was not present,
+ * was unused and just added as pad.
+ * So if i2c_len is 0, it is treated as legacy
+ * and r/w 1 Byte, else if i2c_len is specified,
+ * its treated as new multibyte r/w support.
+ */
+ __u8 i2c_len;
};
struct {/* For PLL info fetch */
@@ -688,6 +713,7 @@ struct eq_generic_event {
#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC)
#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64)
#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64)
+#define CPUCP_HBM_ROW_REPLACE_MAX 32
struct cpucp_sensor {
__le32 type;
@@ -740,6 +766,7 @@ struct cpucp_security_info {
* @fuse_version: silicon production FUSE information.
* @thermal_version: thermald S/W version.
* @cpucp_version: CpuCP S/W version.
+ * @infineon_second_stage_version: Infineon 2nd stage DC-DC version.
* @dram_size: available DRAM size.
* @card_name: card name that will be displayed in HWMON subsystem on the host
* @sec_info: security information
@@ -749,6 +776,10 @@ struct cpucp_security_info {
* @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
* (0 = functional 1 = binned)
* @memory_repair_flag: eFuse flag indicating memory repair
+ * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance
+ * (0 = functional 1 = binned)
+ * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
+ * (0 = functional 1 = binned)
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -761,7 +792,7 @@ struct cpucp_info {
__u8 fuse_version[VERSION_MAX_LEN];
__u8 thermal_version[VERSION_MAX_LEN];
__u8 cpucp_version[VERSION_MAX_LEN];
- __le32 reserved2;
+ __le32 infineon_second_stage_version;
__le64 dram_size;
char card_name[CARD_NAME_MAX_LEN];
__le64 reserved3;
@@ -769,7 +800,9 @@ struct cpucp_info {
__u8 reserved5;
__u8 dram_binning_mask;
__u8 memory_repair_flag;
- __u8 pad[5];
+ __u8 edma_binning_mask;
+ __u8 xbar_binning_mask;
+ __u8 pad[3];
struct cpucp_security_info sec_info;
__le32 reserved6;
__u8 pll_map[PLL_MAP_LEN];
@@ -833,4 +866,25 @@ struct cpucp_nic_status {
__le32 high_ber_cnt;
};
+enum cpucp_hbm_row_replace_cause {
+ REPLACE_CAUSE_DOUBLE_ECC_ERR,
+ REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR,
+};
+
+struct cpucp_hbm_row_info {
+ __u8 hbm_idx;
+ __u8 pc;
+ __u8 sid;
+ __u8 bank_idx;
+ __le16 row_addr;
+ __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */
+ __u8 pad;
+};
+
+struct cpucp_hbm_row_replaced_rows_info {
+ __le16 num_replaced_rows;
+ __u8 pad[6];
+ struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
+};
+
#endif /* CPUCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 2626df6ef3ef..135e21d6edc9 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -32,6 +32,7 @@ enum cpu_boot_err {
CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
CPU_BOOT_ERR_BINNING_FAIL = 19,
+ CPU_BOOT_ERR_TPM_FAIL = 20,
CPU_BOOT_ERR_ENABLED = 31,
CPU_BOOT_ERR_SCND_EN = 63,
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
@@ -108,6 +109,8 @@ enum cpu_boot_err {
* malfunctioning components might still be
* in use.
*
+ * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed.
+ *
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
@@ -130,6 +133,7 @@ enum cpu_boot_err {
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
+#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
index dedf20e8f956..758f246627f8 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -16,27 +16,18 @@
#define PAGE_PRESENT_MASK 0x0000000000001ull
#define SWAP_OUT_MASK 0x0000000000004ull
#define LAST_MASK 0x0000000000800ull
-#define HOP0_MASK 0x3000000000000ull
-#define HOP1_MASK 0x0FF8000000000ull
-#define HOP2_MASK 0x0007FC0000000ull
-#define HOP3_MASK 0x000003FE00000ull
-#define HOP4_MASK 0x00000001FF000ull
#define FLAGS_MASK 0x0000000000FFFull
-#define HOP0_SHIFT 48
-#define HOP1_SHIFT 39
-#define HOP2_SHIFT 30
-#define HOP3_SHIFT 21
-#define HOP4_SHIFT 12
-
#define MMU_ARCH_5_HOPS 5
#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK)
#define HL_PTE_SIZE sizeof(u64)
-#define HOP_TABLE_SIZE PAGE_SIZE_4KB
-#define PTE_ENTRIES_IN_HOP (HOP_TABLE_SIZE / HL_PTE_SIZE)
-#define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID)
+
+/* definitions for HOP with 512 PTE entries */
+#define HOP_PTE_ENTRIES_512 512
+#define HOP_TABLE_SIZE_512_PTE (HOP_PTE_ENTRIES_512 * HL_PTE_SIZE)
+#define HOP0_512_PTE_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE_512_PTE * MAX_ASID)
#define MMU_HOP0_PA43_12_SHIFT 12
#define MMU_HOP0_PA49_44_SHIFT (12 + 32)
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
index 8539dd041f2c..86511002e367 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
@@ -8,8 +8,20 @@
#ifndef INCLUDE_MMU_V1_0_H_
#define INCLUDE_MMU_V1_0_H_
-#define MMU_HOP0_PA43_12 0x490004
-#define MMU_HOP0_PA49_44 0x490008
-#define MMU_ASID_BUSY 0x490000
+#define MMU_V1_0_HOP0_MASK 0x3000000000000ull
+#define MMU_V1_0_HOP1_MASK 0x0FF8000000000ull
+#define MMU_V1_0_HOP2_MASK 0x0007FC0000000ull
+#define MMU_V1_0_HOP3_MASK 0x000003FE00000ull
+#define MMU_V1_0_HOP4_MASK 0x00000001FF000ull
+
+#define MMU_V1_0_HOP0_SHIFT 48
+#define MMU_V1_0_HOP1_SHIFT 39
+#define MMU_V1_0_HOP2_SHIFT 30
+#define MMU_V1_0_HOP3_SHIFT 21
+#define MMU_V1_0_HOP4_SHIFT 12
+
+#define MMU_HOP0_PA43_12 0x490004
+#define MMU_HOP0_PA49_44 0x490008
+#define MMU_ASID_BUSY 0x490000
#endif /* INCLUDE_MMU_V1_0_H_ */
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h
index b2a9570583ac..9c727a5d47b4 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h
@@ -8,9 +8,21 @@
#ifndef INCLUDE_MMU_V1_1_H_
#define INCLUDE_MMU_V1_1_H_
-#define MMU_ASID 0xC12004
-#define MMU_HOP0_PA43_12 0xC12008
-#define MMU_HOP0_PA49_44 0xC1200C
-#define MMU_BUSY 0xC12000
+#define MMU_V1_1_HOP0_MASK 0x3000000000000ull
+#define MMU_V1_1_HOP1_MASK 0x0FF8000000000ull
+#define MMU_V1_1_HOP2_MASK 0x0007FC0000000ull
+#define MMU_V1_1_HOP3_MASK 0x000003FE00000ull
+#define MMU_V1_1_HOP4_MASK 0x00000001FF000ull
+
+#define MMU_V1_1_HOP0_SHIFT 48
+#define MMU_V1_1_HOP1_SHIFT 39
+#define MMU_V1_1_HOP2_SHIFT 30
+#define MMU_V1_1_HOP3_SHIFT 21
+#define MMU_V1_1_HOP4_SHIFT 12
+
+#define MMU_ASID 0xC12004
+#define MMU_HOP0_PA43_12 0xC12008
+#define MMU_HOP0_PA49_44 0xC1200C
+#define MMU_BUSY 0xC12000
#endif /* INCLUDE_MMU_V1_1_H_ */
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
index 0f54730c7ed5..98828030b5a4 100644
--- a/drivers/misc/lattice-ecp3-config.c
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -76,12 +76,12 @@ static void firmware_load(const struct firmware *fw, void *context)
if (fw == NULL) {
dev_err(&spi->dev, "Cannot load firmware, aborting\n");
- return;
+ goto out;
}
if (fw->size == 0) {
dev_err(&spi->dev, "Error: Firmware size is 0!\n");
- return;
+ goto out;
}
/* Fill dummy data (24 stuffing bits for commands) */
@@ -103,7 +103,7 @@ static void firmware_load(const struct firmware *fw, void *context)
dev_err(&spi->dev,
"Error: No supported FPGA detected (JEDEC_ID=%08x)!\n",
jedec_id);
- return;
+ goto out;
}
dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name);
@@ -116,7 +116,7 @@ static void firmware_load(const struct firmware *fw, void *context)
buffer = kzalloc(fw->size + 8, GFP_KERNEL);
if (!buffer) {
dev_err(&spi->dev, "Error: Can't allocate memory!\n");
- return;
+ goto out;
}
/*
@@ -155,7 +155,7 @@ static void firmware_load(const struct firmware *fw, void *context)
"Error: Timeout waiting for FPGA to clear (status=%08x)!\n",
status);
kfree(buffer);
- return;
+ goto out;
}
dev_info(&spi->dev, "Configuring the FPGA...\n");
@@ -181,7 +181,7 @@ static void firmware_load(const struct firmware *fw, void *context)
release_firmware(fw);
kfree(buffer);
-
+out:
complete(&data->fw_loaded);
}
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 83a7baf5df82..2e0aa74ac185 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
- --rename-section .noinstr.text=.rodata,alloc,readonly,load
+ --rename-section .noinstr.text=.rodata,alloc,readonly,load,contents
targets += rodata.o rodata_objcopy.o
$(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE
$(call if_changed,objcopy)
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index f4cb94a9aa9c..f21854ac5cc2 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -41,20 +41,22 @@ static DEFINE_SPINLOCK(lock_me_up);
* Make sure compiler does not optimize this function or stack frame away:
* - function marked noinline
* - stack variables are marked volatile
- * - stack variables are written (memset()) and read (pr_info())
- * - function has external effects (pr_info())
- * */
+ * - stack variables are written (memset()) and read (buf[..] passed as arg)
+ * - function may have external effects (memzero_explicit())
+ * - no tail recursion possible
+ */
static int noinline recursive_loop(int remaining)
{
volatile char buf[REC_STACK_SIZE];
+ volatile int ret;
memset((void *)buf, remaining & 0xFF, sizeof(buf));
- pr_info("loop %d/%d ...\n", (int)buf[remaining % sizeof(buf)],
- recur_count);
if (!remaining)
- return 0;
+ ret = 0;
else
- return recursive_loop(remaining - 1);
+ ret = recursive_loop((int)buf[remaining % sizeof(buf)] - 1);
+ memzero_explicit((void *)buf, sizeof(buf));
+ return ret;
}
/* If the depth is negative, use the default, otherwise keep parameter. */
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 82fb276f7e09..f69b964b9952 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -212,7 +212,11 @@ module_param(cpoint_count, int, 0644);
MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
"crash point is to be hit to trigger action");
-/* For test debug reporting. */
+/*
+ * For test debug reporting when CI systems provide terse summaries.
+ * TODO: Remove this once reasonable reporting exists in most CI systems:
+ * https://lore.kernel.org/lkml/CAHk-=wiFvfkoFixTapvvyPMN9pq5G-+Dys2eSyBa1vzDGAO5+A@mail.gmail.com
+ */
char *lkdtm_kernel_info;
/* Return the crashtype number or NULL if the name is invalid */
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 0e90591235a6..06734670a732 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -2330,6 +2330,8 @@ int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
list_move_tail(&cb->list, &dev->ctrl_rd_list);
}
+ cl->status = 0;
+
mutex_unlock(&dev->device_lock);
wait_event_timeout(cl->wait,
cl->dma_mapped || cl->status,
@@ -2407,6 +2409,8 @@ int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp)
list_move_tail(&cb->list, &dev->ctrl_rd_list);
}
+ cl->status = 0;
+
mutex_unlock(&dev->device_lock);
wait_event_timeout(cl->wait,
!cl->dma_mapped || cl->status,
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index be41843df75b..cebcca6d6d3e 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -672,10 +672,14 @@ static void mei_hbm_cl_dma_map_res(struct mei_device *dev,
if (!cl)
return;
- dev_dbg(dev->dev, "cl dma map result = %d\n", res->status);
- cl->status = res->status;
- if (!cl->status)
+ if (res->status) {
+ dev_err(dev->dev, "cl dma map failed %d\n", res->status);
+ cl->status = -EFAULT;
+ } else {
+ dev_dbg(dev->dev, "cl dma map succeeded\n");
cl->dma_mapped = 1;
+ cl->status = 0;
+ }
wake_up(&cl->wait);
}
@@ -698,10 +702,14 @@ static void mei_hbm_cl_dma_unmap_res(struct mei_device *dev,
if (!cl)
return;
- dev_dbg(dev->dev, "cl dma unmap result = %d\n", res->status);
- cl->status = res->status;
- if (!cl->status)
+ if (res->status) {
+ dev_err(dev->dev, "cl dma unmap failed %d\n", res->status);
+ cl->status = -EFAULT;
+ } else {
+ dev_dbg(dev->dev, "cl dma unmap succeeded\n");
cl->dma_mapped = 0;
+ cl->status = 0;
+ }
wake_up(&cl->wait);
}
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
index a4e854b9b9e6..00652c137cc7 100644
--- a/drivers/misc/mei/hw-txe.c
+++ b/drivers/misc/mei/hw-txe.c
@@ -994,11 +994,7 @@ static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack)
hhisr &= ~IPC_HHIER_SEC;
}
- generated = generated ||
- (hisr & HISR_INT_STS_MSK) ||
- (ipc_isr & SEC_IPC_HOST_INT_STATUS_PENDING);
-
- if (generated && do_ack) {
+ if (do_ack) {
/* Save the interrupt causes */
hw->intr_cause |= hisr & HISR_INT_STS_MSK;
if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY)
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 5c8cb679b997..f79076c67256 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -24,6 +24,7 @@ const char *mei_dev_state_str(int state)
MEI_DEV_STATE(ENABLED);
MEI_DEV_STATE(RESETTING);
MEI_DEV_STATE(DISABLED);
+ MEI_DEV_STATE(POWERING_DOWN);
MEI_DEV_STATE(POWER_DOWN);
MEI_DEV_STATE(POWER_UP);
default:
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 2ed7e3aaff3a..8f786a225dcf 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -865,7 +865,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
goto err_release_irq;
}
misc_device->parent = &pdev->dev;
- misc_device->fops = &pci_endpoint_test_fops,
+ misc_device->fops = &pci_endpoint_test_fops;
err = misc_register(misc_device);
if (err) {
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 4c26b19f5154..f0e7f02605eb 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -371,6 +371,7 @@ static const struct of_device_id sram_dt_ids[] = {
{ .compatible = "atmel,sama5d2-securam", .data = &atmel_securam_config },
{ .compatible = "nvidia,tegra186-sysram", .data = &tegra_sysram_config },
{ .compatible = "nvidia,tegra194-sysram", .data = &tegra_sysram_config },
+ { .compatible = "nvidia,tegra234-sysram", .data = &tegra_sysram_config },
{}
};
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index 488eeb2811ae..281c54003edc 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -289,7 +289,7 @@ static ssize_t api_show(struct device *dev,
{
struct uacce_device *uacce = to_uacce_device(dev);
- return sprintf(buf, "%s\n", uacce->api_ver);
+ return sysfs_emit(buf, "%s\n", uacce->api_ver);
}
static ssize_t flags_show(struct device *dev,
@@ -297,7 +297,7 @@ static ssize_t flags_show(struct device *dev,
{
struct uacce_device *uacce = to_uacce_device(dev);
- return sprintf(buf, "%u\n", uacce->flags);
+ return sysfs_emit(buf, "%u\n", uacce->flags);
}
static ssize_t available_instances_show(struct device *dev,
@@ -309,7 +309,7 @@ static ssize_t available_instances_show(struct device *dev,
if (!uacce->ops->get_available_instances)
return -ENODEV;
- return sprintf(buf, "%d\n",
+ return sysfs_emit(buf, "%d\n",
uacce->ops->get_available_instances(uacce));
}
@@ -318,7 +318,7 @@ static ssize_t algorithms_show(struct device *dev,
{
struct uacce_device *uacce = to_uacce_device(dev);
- return sprintf(buf, "%s\n", uacce->algs);
+ return sysfs_emit(buf, "%s\n", uacce->algs);
}
static ssize_t region_mmio_size_show(struct device *dev,
@@ -326,7 +326,7 @@ static ssize_t region_mmio_size_show(struct device *dev,
{
struct uacce_device *uacce = to_uacce_device(dev);
- return sprintf(buf, "%lu\n",
+ return sysfs_emit(buf, "%lu\n",
uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT);
}
@@ -335,7 +335,7 @@ static ssize_t region_dus_size_show(struct device *dev,
{
struct uacce_device *uacce = to_uacce_device(dev);
- return sprintf(buf, "%lu\n",
+ return sysfs_emit(buf, "%lu\n",
uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
}
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index c0b5e339d5a1..6cf3e21c7604 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -687,10 +687,8 @@ int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
}
spin_unlock(&context->lock);
- if (found) {
- synchronize_rcu();
- kfree(notifier);
- }
+ if (found)
+ kvfree_rcu(notifier);
vmci_ctx_put(context);
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
index e3436abf39f4..2100297c94ad 100644
--- a/drivers/misc/vmw_vmci/vmci_event.c
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -209,8 +209,7 @@ int vmci_event_unsubscribe(u32 sub_id)
if (!s)
return VMCI_ERROR_NOT_FOUND;
- synchronize_rcu();
- kfree(s);
+ kvfree_rcu(s);
return VMCI_SUCCESS;
}
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 7693236c946f..7ab1b38a7be5 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1128,8 +1128,8 @@ static int jz4740_mmc_resume(struct device *dev)
return pinctrl_select_default_state(dev);
}
-DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend,
- jz4740_mmc_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend,
+ jz4740_mmc_resume);
static struct platform_driver jz4740_mmc_driver = {
.probe = jz4740_mmc_probe,
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 98c218bd6669..40b6878bea6c 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -1210,7 +1210,7 @@ static int mxcmci_resume(struct device *dev)
return ret;
}
-DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
static struct platform_driver mxcmci_driver = {
.probe = mxcmci_probe,
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 084c61b2cbec..2797a9c0f17d 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -642,7 +642,7 @@ static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host)
* is at least SH_MOBILE_SDHI_MIN_TAP_ROW probes long then use the
* center index as the tap, otherwise bail out.
*/
- bitmap_for_each_set_region(bitmap, rs, re, 0, taps_size) {
+ for_each_set_bitrange(rs, re, bitmap, taps_size) {
if (re - rs > tap_cnt) {
tap_end = re;
tap_start = rs;
diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c
index acabb7715b42..73258b24fea7 100644
--- a/drivers/most/most_usb.c
+++ b/drivers/most/most_usb.c
@@ -831,7 +831,7 @@ static ssize_t value_show(struct device *dev, struct device_attribute *attr,
int err;
if (sysfs_streq(name, "arb_address"))
- return snprintf(buf, PAGE_SIZE, "%04x\n", dci_obj->reg_addr);
+ return sysfs_emit(buf, "%04x\n", dci_obj->reg_addr);
if (sysfs_streq(name, "arb_value"))
reg_addr = dci_obj->reg_addr;
@@ -843,7 +843,7 @@ static ssize_t value_show(struct device *dev, struct device_attribute *attr,
if (err < 0)
return err;
- return snprintf(buf, PAGE_SIZE, "%04x\n", val);
+ return sysfs_emit(buf, "%04x\n", val);
}
static ssize_t value_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 07fc603c2fa7..ec498ce70f35 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3874,8 +3874,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
skb->l4_hash)
return skb->hash;
- return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
- skb->mac_header, skb->network_header,
+ return __bond_xmit_hash(bond, skb, skb->data, skb->protocol,
+ skb_mac_offset(skb), skb_network_offset(skb),
skb_headlen(skb));
}
@@ -4884,25 +4884,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL;
struct list_head *iter;
+ bool xmit_suc = false;
+ bool skb_used = false;
bond_for_each_slave_rcu(bond, slave, iter) {
- if (bond_is_last_slave(bond, slave))
- break;
- if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *skb2;
+
+ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+ continue;
+ if (bond_is_last_slave(bond, slave)) {
+ skb2 = skb;
+ skb_used = true;
+ } else {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2) {
net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
bond_dev->name, __func__);
continue;
}
- bond_dev_queue_xmit(bond, skb2, slave->dev);
}
+
+ if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
+ xmit_suc = true;
}
- if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
- return bond_dev_queue_xmit(bond, skb, slave->dev);
- return bond_tx_drop(bond_dev, skb);
+ if (!skb_used)
+ dev_kfree_skb_any(skb);
+
+ if (xmit_suc)
+ return NETDEV_TX_OK;
+
+ atomic_long_inc(&bond_dev->tx_dropped);
+ return NET_XMIT_DROP;
}
/*------------------------- Device initialization ---------------------------*/
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 2ec11af5f0cc..46b150e6289e 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -11,7 +11,7 @@
static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
loff_t off = 0;
@@ -30,7 +30,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
bool found = false;
@@ -57,7 +57,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
static void bond_info_show_master(struct seq_file *seq)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
const struct bond_opt_value *optval;
struct slave *curr, *primary;
int i;
@@ -175,7 +175,7 @@ static void bond_info_show_master(struct seq_file *seq)
static void bond_info_show_slave(struct seq_file *seq,
const struct slave *slave)
{
- struct bonding *bond = PDE_DATA(file_inode(seq->file));
+ struct bonding *bond = pde_data(file_inode(seq->file));
seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link));
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 91230894692d..444ef6a342f6 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -754,7 +754,7 @@ static void cfv_remove(struct virtio_device *vdev)
debugfs_remove_recursive(cfv->debugfs);
vringh_kiov_cleanup(&cfv->ctx.riov);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->vringh_config->del_vrhs(cfv->vdev);
cfv->vr_rx = NULL;
vdev->config->del_vqs(cfv->vdev);
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 849de4564709..621ce742ad21 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -106,9 +106,9 @@ static void emac_update_speed(struct net_device *dev)
/* set EMAC SPEED, depend on PHY */
reg_val = readl(db->membase + EMAC_MAC_SUPP_REG);
- reg_val &= ~(0x1 << 8);
+ reg_val &= ~EMAC_MAC_SUPP_100M;
if (db->speed == SPEED_100)
- reg_val |= 1 << 8;
+ reg_val |= EMAC_MAC_SUPP_100M;
writel(reg_val, db->membase + EMAC_MAC_SUPP_REG);
}
@@ -264,7 +264,7 @@ static void emac_dma_done_callback(void *arg)
/* re enable interrupt */
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0x01 << 8);
+ reg_val |= EMAC_INT_CTL_RX_EN;
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
db->emacrx_completed_flag = 1;
@@ -429,7 +429,7 @@ static unsigned int emac_powerup(struct net_device *ndev)
/* initial EMAC */
/* flush RX FIFO */
reg_val = readl(db->membase + EMAC_RX_CTL_REG);
- reg_val |= 0x8;
+ reg_val |= EMAC_RX_CTL_FLUSH_FIFO;
writel(reg_val, db->membase + EMAC_RX_CTL_REG);
udelay(1);
@@ -441,8 +441,8 @@ static unsigned int emac_powerup(struct net_device *ndev)
/* set MII clock */
reg_val = readl(db->membase + EMAC_MAC_MCFG_REG);
- reg_val &= (~(0xf << 2));
- reg_val |= (0xD << 2);
+ reg_val &= ~EMAC_MAC_MCFG_MII_CLKD_MASK;
+ reg_val |= EMAC_MAC_MCFG_MII_CLKD_72;
writel(reg_val, db->membase + EMAC_MAC_MCFG_REG);
/* clear RX counter */
@@ -506,7 +506,7 @@ static void emac_init_device(struct net_device *dev)
/* enable RX/TX0/RX Hlevel interrup */
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
spin_unlock_irqrestore(&db->lock, flags);
@@ -637,7 +637,9 @@ static void emac_rx(struct net_device *dev)
if (!rxcount) {
db->emacrx_completed_flag = 1;
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN |
+ EMAC_INT_CTL_TX_ABRT_EN |
+ EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
/* had one stuck? */
@@ -669,7 +671,9 @@ static void emac_rx(struct net_device *dev)
writel(reg_val | EMAC_CTL_RX_EN,
db->membase + EMAC_CTL_REG);
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN |
+ EMAC_INT_CTL_TX_ABRT_EN |
+ EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
db->emacrx_completed_flag = 1;
@@ -783,20 +787,20 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
}
/* Transmit Interrupt check */
- if (int_status & (0x01 | 0x02))
+ if (int_status & EMAC_INT_STA_TX_COMPLETE)
emac_tx_done(dev, db, int_status);
- if (int_status & (0x04 | 0x08))
+ if (int_status & EMAC_INT_STA_TX_ABRT)
netdev_info(dev, " ab : %x\n", int_status);
/* Re-enable interrupt mask */
if (db->emacrx_completed_flag == 1) {
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0) | (0x01 << 8);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
} else {
reg_val = readl(db->membase + EMAC_INT_CTL_REG);
- reg_val |= (0xf << 0);
+ reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
}
@@ -1068,6 +1072,7 @@ out_clk_disable_unprepare:
clk_disable_unprepare(db->clk);
out_dispose_mapping:
irq_dispose_mapping(ndev->irq);
+ dma_release_channel(db->rx_chan);
out_iounmap:
iounmap(db->membase);
out:
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.h b/drivers/net/ethernet/allwinner/sun4i-emac.h
index 38c72d9ec600..90bd9ad77607 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.h
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.h
@@ -38,6 +38,7 @@
#define EMAC_RX_CTL_REG (0x3c)
#define EMAC_RX_CTL_AUTO_DRQ_EN (1 << 1)
#define EMAC_RX_CTL_DMA_EN (1 << 2)
+#define EMAC_RX_CTL_FLUSH_FIFO (1 << 3)
#define EMAC_RX_CTL_PASS_ALL_EN (1 << 4)
#define EMAC_RX_CTL_PASS_CTL_EN (1 << 5)
#define EMAC_RX_CTL_PASS_CRC_ERR_EN (1 << 6)
@@ -61,7 +62,21 @@
#define EMAC_RX_IO_DATA_STATUS_OK (1 << 7)
#define EMAC_RX_FBC_REG (0x50)
#define EMAC_INT_CTL_REG (0x54)
+#define EMAC_INT_CTL_RX_EN (1 << 8)
+#define EMAC_INT_CTL_TX0_EN (1)
+#define EMAC_INT_CTL_TX1_EN (1 << 1)
+#define EMAC_INT_CTL_TX_EN (EMAC_INT_CTL_TX0_EN | EMAC_INT_CTL_TX1_EN)
+#define EMAC_INT_CTL_TX0_ABRT_EN (0x1 << 2)
+#define EMAC_INT_CTL_TX1_ABRT_EN (0x1 << 3)
+#define EMAC_INT_CTL_TX_ABRT_EN (EMAC_INT_CTL_TX0_ABRT_EN | EMAC_INT_CTL_TX1_ABRT_EN)
#define EMAC_INT_STA_REG (0x58)
+#define EMAC_INT_STA_TX0_COMPLETE (0x1)
+#define EMAC_INT_STA_TX1_COMPLETE (0x1 << 1)
+#define EMAC_INT_STA_TX_COMPLETE (EMAC_INT_STA_TX0_COMPLETE | EMAC_INT_STA_TX1_COMPLETE)
+#define EMAC_INT_STA_TX0_ABRT (0x1 << 2)
+#define EMAC_INT_STA_TX1_ABRT (0x1 << 3)
+#define EMAC_INT_STA_TX_ABRT (EMAC_INT_STA_TX0_ABRT | EMAC_INT_STA_TX1_ABRT)
+#define EMAC_INT_STA_RX_COMPLETE (0x1 << 8)
#define EMAC_MAC_CTL0_REG (0x5c)
#define EMAC_MAC_CTL0_RX_FLOW_CTL_EN (1 << 2)
#define EMAC_MAC_CTL0_TX_FLOW_CTL_EN (1 << 3)
@@ -87,8 +102,11 @@
#define EMAC_MAC_CLRT_RM (0x0f)
#define EMAC_MAC_MAXF_REG (0x70)
#define EMAC_MAC_SUPP_REG (0x74)
+#define EMAC_MAC_SUPP_100M (0x1 << 8)
#define EMAC_MAC_TEST_REG (0x78)
#define EMAC_MAC_MCFG_REG (0x7c)
+#define EMAC_MAC_MCFG_MII_CLKD_MASK (0xff << 2)
+#define EMAC_MAC_MCFG_MII_CLKD_72 (0x0d << 2)
#define EMAC_MAC_A0_REG (0x98)
#define EMAC_MAC_A1_REG (0x9c)
#define EMAC_MAC_A2_REG (0xa0)
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 9a650d1c1bdd..4d2ba30c2fbd 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1237,6 +1237,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
struct bmac_data *bp;
const unsigned char *prop_addr;
unsigned char addr[6];
+ u8 macaddr[6];
struct net_device *dev;
int is_bmac_plus = ((int)match->data) != 0;
@@ -1284,7 +1285,9 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0;
for (j = 0; j < 6; ++j)
- dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+ macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
+
+ eth_hw_addr_set(dev, macaddr);
/* Enable chip without interrupts for now */
bmac_enable_and_reset_chip(dev);
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 4b80e3a52a19..6f8c91eb1263 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -90,7 +90,7 @@ static void mace_set_timeout(struct net_device *dev);
static void mace_tx_timeout(struct timer_list *t);
static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma);
static inline void mace_clean_rings(struct mace_data *mp);
-static void __mace_set_address(struct net_device *dev, void *addr);
+static void __mace_set_address(struct net_device *dev, const void *addr);
/*
* If we can't get a skbuff when we need it, we use this area for DMA.
@@ -112,6 +112,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
struct net_device *dev;
struct mace_data *mp;
const unsigned char *addr;
+ u8 macaddr[ETH_ALEN];
int j, rev, rc = -EBUSY;
if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
@@ -167,8 +168,9 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0;
for (j = 0; j < 6; ++j) {
- dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+ macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
}
+ eth_hw_addr_set(dev, macaddr);
mp->chipid = (in_8(&mp->mace->chipid_hi) << 8) |
in_8(&mp->mace->chipid_lo);
@@ -369,11 +371,12 @@ static void mace_reset(struct net_device *dev)
out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO);
}
-static void __mace_set_address(struct net_device *dev, void *addr)
+static void __mace_set_address(struct net_device *dev, const void *addr)
{
struct mace_data *mp = netdev_priv(dev);
volatile struct mace __iomem *mb = mp->mace;
- unsigned char *p = addr;
+ const unsigned char *p = addr;
+ u8 macaddr[ETH_ALEN];
int i;
/* load up the hardware address */
@@ -385,7 +388,10 @@ static void __mace_set_address(struct net_device *dev, void *addr)
;
}
for (i = 0; i < 6; ++i)
- out_8(&mb->padr, dev->dev_addr[i] = p[i]);
+ out_8(&mb->padr, macaddr[i] = p[i]);
+
+ eth_hw_addr_set(dev, macaddr);
+
if (mp->chipid != BROKEN_ADDRCHG_REV)
out_8(&mb->iac, 0);
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 226f4403cfed..87f1056e29ff 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4020,10 +4020,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
/* Request the WOL interrupt and advertise suspend if available */
priv->wol_irq_disabled = true;
- err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0,
- dev->name, priv);
- if (!err)
- device_set_wakeup_capable(&pdev->dev, 1);
+ if (priv->wol_irq > 0) {
+ err = devm_request_irq(&pdev->dev, priv->wol_irq,
+ bcmgenet_wol_isr, 0, dev->name, priv);
+ if (!err)
+ device_set_wakeup_capable(&pdev->dev, 1);
+ }
/* Set the needed headroom to account for any possible
* features enabling/disabling at runtime
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index d04a6c163445..da8d10475a08 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -32,6 +32,7 @@
#include <linux/tcp.h>
#include <linux/ipv6.h>
+#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/ip6_route.h>
@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
peer_port, local_port, IPPROTO_TCP,
- tos, 0);
+ tos & ~INET_ECN_MASK, 0);
if (IS_ERR(rt))
return NULL;
n = dst_neigh_lookup(&rt->dst, &peer_ip);
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 5b8b9bcf41a2..266e562bd67a 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -51,6 +51,7 @@ struct tgec_mdio_controller {
struct mdio_fsl_priv {
struct tgec_mdio_controller __iomem *mdio_base;
bool is_little_endian;
+ bool has_a009885;
bool has_a011043;
};
@@ -186,10 +187,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
{
struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+ unsigned long flags;
uint16_t dev_addr;
uint32_t mdio_stat;
uint32_t mdio_ctl;
- uint16_t value;
int ret;
bool endian = priv->is_little_endian;
@@ -221,12 +222,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
return ret;
}
+ if (priv->has_a009885)
+ /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we
+ * must read back the data register within 16 MDC cycles.
+ */
+ local_irq_save(flags);
+
/* Initiate the read */
xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
ret = xgmac_wait_until_done(&bus->dev, regs, endian);
if (ret)
- return ret;
+ goto irq_restore;
/* Return all Fs if nothing was there */
if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
@@ -234,13 +241,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
dev_dbg(&bus->dev,
"Error while reading PHY%d reg at %d.%hhu\n",
phy_id, dev_addr, regnum);
- return 0xffff;
+ ret = 0xffff;
+ } else {
+ ret = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
+ dev_dbg(&bus->dev, "read %04x\n", ret);
}
- value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
- dev_dbg(&bus->dev, "read %04x\n", value);
+irq_restore:
+ if (priv->has_a009885)
+ local_irq_restore(flags);
- return value;
+ return ret;
}
static int xgmac_mdio_probe(struct platform_device *pdev)
@@ -287,6 +298,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
priv->is_little_endian = device_property_read_bool(&pdev->dev,
"little-endian");
+ priv->has_a009885 = device_property_read_bool(&pdev->dev,
+ "fsl,erratum-a009885");
priv->has_a011043 = device_property_read_bool(&pdev->dev,
"fsl,erratum-a011043");
@@ -318,9 +331,10 @@ err_ioremap:
static int xgmac_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
+ struct mdio_fsl_priv *priv = bus->priv;
mdiobus_unregister(bus);
- iounmap(bus->priv);
+ iounmap(priv->mdio_base);
mdiobus_free(bus);
return 0;
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 27937c5d7956..daec9ce04531 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev)
netdevice->dev_addr[5] = readb(eth_addr + 0x06);
iounmap(eth_addr);
- if (!netdevice->irq) {
+ if (netdevice->irq < 0) {
printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n",
__FILE__, netdevice->base_addr);
+ retval = netdevice->irq;
goto probe_failed;
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index a0a5a8e6bd8c..2fd9ef2fe5d6 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -283,7 +283,6 @@ struct prestera_router {
struct list_head rif_entry_list;
struct notifier_block inetaddr_nb;
struct notifier_block inetaddr_valid_nb;
- bool aborted;
};
struct prestera_rxtx_params {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 51fc841b1e7a..e6bfadc874c5 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -1831,8 +1831,8 @@ static int prestera_iface_to_msg(struct prestera_iface *iface,
int prestera_hw_rif_create(struct prestera_switch *sw,
struct prestera_iface *iif, u8 *mac, u16 *rif_id)
{
- struct prestera_msg_rif_req req;
struct prestera_msg_rif_resp resp;
+ struct prestera_msg_rif_req req;
int err;
memcpy(req.mac, mac, ETH_ALEN);
@@ -1868,9 +1868,9 @@ int prestera_hw_rif_delete(struct prestera_switch *sw, u16 rif_id,
int prestera_hw_vr_create(struct prestera_switch *sw, u16 *vr_id)
{
- int err;
struct prestera_msg_vr_resp resp;
struct prestera_msg_vr_req req;
+ int err;
err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ROUTER_VR_CREATE,
&req.cmd, sizeof(req), &resp.ret, sizeof(resp));
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 08fdd1e50388..cad93f747d0c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -982,6 +982,7 @@ static void prestera_switch_fini(struct prestera_switch *sw)
prestera_event_handlers_unregister(sw);
prestera_rxtx_switch_fini(sw);
prestera_switchdev_fini(sw);
+ prestera_router_fini(sw);
prestera_netdev_event_handler_unregister(sw);
prestera_hw_switch_fini(sw);
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index 8a3b7b664358..6ef4d32b8fdd 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -25,10 +25,10 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
struct netlink_ext_ack *extack)
{
struct prestera_port *port = netdev_priv(port_dev);
- int err;
- struct prestera_rif_entry *re;
struct prestera_rif_entry_key re_key = {};
+ struct prestera_rif_entry *re;
u32 kern_tb_id;
+ int err;
err = prestera_is_valid_mac_addr(port, port_dev->dev_addr);
if (err) {
@@ -45,21 +45,21 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
switch (event) {
case NETDEV_UP:
if (re) {
- NL_SET_ERR_MSG_MOD(extack, "rif_entry already exist");
+ NL_SET_ERR_MSG_MOD(extack, "RIF already exist");
return -EEXIST;
}
re = prestera_rif_entry_create(port->sw, &re_key,
prestera_fix_tb_id(kern_tb_id),
port_dev->dev_addr);
if (!re) {
- NL_SET_ERR_MSG_MOD(extack, "Can't create rif_entry");
+ NL_SET_ERR_MSG_MOD(extack, "Can't create RIF");
return -EINVAL;
}
dev_hold(port_dev);
break;
case NETDEV_DOWN:
if (!re) {
- NL_SET_ERR_MSG_MOD(extack, "rif_entry not exist");
+ NL_SET_ERR_MSG_MOD(extack, "Can't find RIF");
return -EEXIST;
}
prestera_rif_entry_destroy(port->sw, re);
@@ -75,11 +75,11 @@ static int __prestera_inetaddr_event(struct prestera_switch *sw,
unsigned long event,
struct netlink_ext_ack *extack)
{
- if (prestera_netdev_check(dev) && !netif_is_bridge_port(dev) &&
- !netif_is_lag_port(dev) && !netif_is_ovs_port(dev))
- return __prestera_inetaddr_port_event(dev, event, extack);
+ if (!prestera_netdev_check(dev) || netif_is_bridge_port(dev) ||
+ netif_is_lag_port(dev) || netif_is_ovs_port(dev))
+ return 0;
- return 0;
+ return __prestera_inetaddr_port_event(dev, event, extack);
}
static int __prestera_inetaddr_cb(struct notifier_block *nb,
@@ -126,6 +126,8 @@ static int __prestera_inetaddr_valid_cb(struct notifier_block *nb,
goto out;
if (ipv4_is_multicast(ivi->ivi_addr)) {
+ NL_SET_ERR_MSG_MOD(ivi->extack,
+ "Multicast addr on RIF is not supported");
err = -EINVAL;
goto out;
}
@@ -166,7 +168,7 @@ int prestera_router_init(struct prestera_switch *sw)
err_register_inetaddr_notifier:
unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
err_register_inetaddr_validator_notifier:
- /* prestera_router_hw_fini */
+ prestera_router_hw_fini(sw);
err_router_lib_init:
kfree(sw->router);
return err;
@@ -176,7 +178,7 @@ void prestera_router_fini(struct prestera_switch *sw)
{
unregister_inetaddr_notifier(&sw->router->inetaddr_nb);
unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb);
- /* router_hw_fini */
+ prestera_router_hw_fini(sw);
kfree(sw->router);
sw->router = NULL;
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
index 5866a4be50f5..e5592b69ad37 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
@@ -29,6 +29,12 @@ int prestera_router_hw_init(struct prestera_switch *sw)
return 0;
}
+void prestera_router_hw_fini(struct prestera_switch *sw)
+{
+ WARN_ON(!list_empty(&sw->router->vr_list));
+ WARN_ON(!list_empty(&sw->router->rif_entry_list));
+}
+
static struct prestera_vr *__prestera_vr_find(struct prestera_switch *sw,
u32 tb_id)
{
@@ -47,13 +53,8 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
struct netlink_ext_ack *extack)
{
struct prestera_vr *vr;
- u16 hw_vr_id;
int err;
- err = prestera_hw_vr_create(sw, &hw_vr_id);
- if (err)
- return ERR_PTR(-ENOMEM);
-
vr = kzalloc(sizeof(*vr), GFP_KERNEL);
if (!vr) {
err = -ENOMEM;
@@ -61,23 +62,26 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
}
vr->tb_id = tb_id;
- vr->hw_vr_id = hw_vr_id;
+
+ err = prestera_hw_vr_create(sw, &vr->hw_vr_id);
+ if (err)
+ goto err_hw_create;
list_add(&vr->router_node, &sw->router->vr_list);
return vr;
-err_alloc_vr:
- prestera_hw_vr_delete(sw, hw_vr_id);
+err_hw_create:
kfree(vr);
+err_alloc_vr:
return ERR_PTR(err);
}
static void __prestera_vr_destroy(struct prestera_switch *sw,
struct prestera_vr *vr)
{
- prestera_hw_vr_delete(sw, vr->hw_vr_id);
list_del(&vr->router_node);
+ prestera_hw_vr_delete(sw, vr->hw_vr_id);
kfree(vr);
}
@@ -87,17 +91,22 @@ static struct prestera_vr *prestera_vr_get(struct prestera_switch *sw, u32 tb_id
struct prestera_vr *vr;
vr = __prestera_vr_find(sw, tb_id);
- if (!vr)
+ if (vr) {
+ refcount_inc(&vr->refcount);
+ } else {
vr = __prestera_vr_create(sw, tb_id, extack);
- if (IS_ERR(vr))
- return ERR_CAST(vr);
+ if (IS_ERR(vr))
+ return ERR_CAST(vr);
+
+ refcount_set(&vr->refcount, 1);
+ }
return vr;
}
static void prestera_vr_put(struct prestera_switch *sw, struct prestera_vr *vr)
{
- if (!vr->ref_cnt)
+ if (refcount_dec_and_test(&vr->refcount))
__prestera_vr_destroy(sw, vr);
}
@@ -120,7 +129,7 @@ __prestera_rif_entry_key_copy(const struct prestera_rif_entry_key *in,
out->iface.vlan_id = in->iface.vlan_id;
break;
default:
- pr_err("Unsupported iface type");
+ WARN(1, "Unsupported iface type");
return -EINVAL;
}
@@ -158,7 +167,6 @@ void prestera_rif_entry_destroy(struct prestera_switch *sw,
iface.vr_id = e->vr->hw_vr_id;
prestera_hw_rif_delete(sw, e->hw_id, &iface);
- e->vr->ref_cnt--;
prestera_vr_put(sw, e->vr);
kfree(e);
}
@@ -183,7 +191,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
if (IS_ERR(e->vr))
goto err_vr_get;
- e->vr->ref_cnt++;
memcpy(&e->addr, addr, sizeof(e->addr));
/* HW */
@@ -198,7 +205,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
return e;
err_hw_create:
- e->vr->ref_cnt--;
prestera_vr_put(sw, e->vr);
err_vr_get:
err_key_copy:
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
index fed53595f7bb..b6b028551868 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
@@ -6,7 +6,7 @@
struct prestera_vr {
struct list_head router_node;
- unsigned int ref_cnt;
+ refcount_t refcount;
u32 tb_id; /* key (kernel fib table id) */
u16 hw_vr_id; /* virtual router ID */
u8 __pad[2];
@@ -32,5 +32,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
struct prestera_rif_entry_key *k,
u32 tb_id, const unsigned char *addr);
int prestera_router_hw_init(struct prestera_switch *sw);
+void prestera_router_hw_fini(struct prestera_switch *sw);
#endif /* _PRESTERA_ROUTER_HW_H_ */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b67b4323cff0..f02d07ec5ccb 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -267,7 +267,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
phylink_config);
struct mtk_eth *eth = mac->hw;
u32 mcr_cur, mcr_new, sid, i;
- int val, ge_mode, err;
+ int val, ge_mode, err = 0;
/* MT76x8 has no hardware settings between for the MAC */
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 33815246fead..378fc8e3bd97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/inet_ecn.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
@@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index b1311b656e17..455293aa6343 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -771,7 +771,10 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
- ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
+ /* Don't attempt to send PAUSE frames on the NPI port, it's broken */
+ if (port != ocelot->npi)
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA,
+ tx_pause);
/* Undo the effects of ocelot_phylink_mac_link_down:
* enable MAC module
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index beb9379424c0..949858891973 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -559,13 +559,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
return -EOPNOTSUPP;
}
- if (filter->block_id == VCAP_IS1 &&
- !is_zero_ether_addr(match.mask->dst)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Key type S1_NORMAL cannot match on destination MAC");
- return -EOPNOTSUPP;
- }
-
/* The hw support mac matches only for MAC_ETYPE key,
* therefore if other matches(port, tcp flags, etc) are added
* then just bail out
@@ -580,6 +573,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
return -EOPNOTSUPP;
flow_rule_match_eth_addrs(rule, &match);
+
+ if (filter->block_id == VCAP_IS1 &&
+ !is_zero_ether_addr(match.mask->dst)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Key type S1_NORMAL cannot match on destination MAC");
+ return -EOPNOTSUPP;
+ }
+
filter->key_type = OCELOT_VCAP_KEY_ETYPE;
ether_addr_copy(filter->key.etype.dmac.value,
match.key->dst);
@@ -805,13 +806,34 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
struct netlink_ext_ack *extack = f->common.extack;
struct ocelot_vcap_filter *filter;
int chain = f->common.chain_index;
- int ret;
+ int block_id, ret;
if (chain && !ocelot_find_vcap_filter_that_points_at(ocelot, chain)) {
NL_SET_ERR_MSG_MOD(extack, "No default GOTO action points to this chain");
return -EOPNOTSUPP;
}
+ block_id = ocelot_chain_to_block(chain, ingress);
+ if (block_id < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot offload to this chain");
+ return -EOPNOTSUPP;
+ }
+
+ filter = ocelot_vcap_block_find_filter_by_id(&ocelot->block[block_id],
+ f->cookie, true);
+ if (filter) {
+ /* Filter already exists on other ports */
+ if (!ingress) {
+ NL_SET_ERR_MSG_MOD(extack, "VCAP ES0 does not support shared filters");
+ return -EOPNOTSUPP;
+ }
+
+ filter->ingress_port_mask |= BIT(port);
+
+ return ocelot_vcap_filter_replace(ocelot, filter);
+ }
+
+ /* Filter didn't exist, create it now */
filter = ocelot_vcap_filter_create(ocelot, port, ingress, f);
if (!filter)
return -ENOMEM;
@@ -874,6 +896,12 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port,
if (filter->type == OCELOT_VCAP_FILTER_DUMMY)
return ocelot_vcap_dummy_filter_del(ocelot, filter);
+ if (ingress) {
+ filter->ingress_port_mask &= ~BIT(port);
+ if (filter->ingress_port_mask)
+ return ocelot_vcap_filter_replace(ocelot, filter);
+ }
+
return ocelot_vcap_filter_del(ocelot, filter);
}
EXPORT_SYMBOL_GPL(ocelot_cls_flower_destroy);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 8115c3db252e..e271b6225b72 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1187,7 +1187,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
ocelot_port_bridge_join(ocelot, port, bridge);
err = switchdev_bridge_port_offload(brport_dev, dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb,
false, extack);
if (err)
@@ -1201,7 +1201,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
err_switchdev_sync:
switchdev_bridge_port_unoffload(brport_dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb);
err_switchdev_offload:
ocelot_port_bridge_leave(ocelot, port, bridge);
@@ -1214,7 +1214,7 @@ static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
struct ocelot_port_private *priv = netdev_priv(dev);
switchdev_bridge_port_unoffload(brport_dev, priv,
- &ocelot_netdevice_nb,
+ &ocelot_switchdev_nb,
&ocelot_switchdev_blocking_nb);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index adfeb8d3293d..62a69a91ab22 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -12,6 +12,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/mfd/syscon.h>
@@ -48,46 +49,60 @@
#define DWMAC_RX_VARDELAY(d) ((d) << DWMAC_RX_VARDELAY_SHIFT)
#define DWMAC_RXN_VARDELAY(d) ((d) << DWMAC_RXN_VARDELAY_SHIFT)
+struct oxnas_dwmac;
+
+struct oxnas_dwmac_data {
+ int (*setup)(struct oxnas_dwmac *dwmac);
+};
+
struct oxnas_dwmac {
struct device *dev;
struct clk *clk;
struct regmap *regmap;
+ const struct oxnas_dwmac_data *data;
};
-static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+static int oxnas_dwmac_setup_ox810se(struct oxnas_dwmac *dwmac)
{
- struct oxnas_dwmac *dwmac = priv;
unsigned int value;
int ret;
- /* Reset HW here before changing the glue configuration */
- ret = device_reset(dwmac->dev);
- if (ret)
+ ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
+ if (ret < 0)
return ret;
- ret = clk_prepare_enable(dwmac->clk);
- if (ret)
- return ret;
+ /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
+ value |= BIT(DWMAC_CKEN_GTX) |
+ /* Use simple mux for 25/125 Mhz clock switching */
+ BIT(DWMAC_SIMPLE_MUX);
+
+ regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
+
+ return 0;
+}
+
+static int oxnas_dwmac_setup_ox820(struct oxnas_dwmac *dwmac)
+{
+ unsigned int value;
+ int ret;
ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
- if (ret < 0) {
- clk_disable_unprepare(dwmac->clk);
+ if (ret < 0)
return ret;
- }
/* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
value |= BIT(DWMAC_CKEN_GTX) |
/* Use simple mux for 25/125 Mhz clock switching */
- BIT(DWMAC_SIMPLE_MUX) |
- /* set auto switch tx clock source */
- BIT(DWMAC_AUTO_TX_SOURCE) |
- /* enable tx & rx vardelay */
- BIT(DWMAC_CKEN_TX_OUT) |
- BIT(DWMAC_CKEN_TXN_OUT) |
- BIT(DWMAC_CKEN_TX_IN) |
- BIT(DWMAC_CKEN_RX_OUT) |
- BIT(DWMAC_CKEN_RXN_OUT) |
- BIT(DWMAC_CKEN_RX_IN);
+ BIT(DWMAC_SIMPLE_MUX) |
+ /* set auto switch tx clock source */
+ BIT(DWMAC_AUTO_TX_SOURCE) |
+ /* enable tx & rx vardelay */
+ BIT(DWMAC_CKEN_TX_OUT) |
+ BIT(DWMAC_CKEN_TXN_OUT) |
+ BIT(DWMAC_CKEN_TX_IN) |
+ BIT(DWMAC_CKEN_RX_OUT) |
+ BIT(DWMAC_CKEN_RXN_OUT) |
+ BIT(DWMAC_CKEN_RX_IN);
regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
/* set tx & rx vardelay */
@@ -100,6 +115,27 @@ static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
return 0;
}
+static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+{
+ struct oxnas_dwmac *dwmac = priv;
+ int ret;
+
+ /* Reset HW here before changing the glue configuration */
+ ret = device_reset(dwmac->dev);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(dwmac->clk);
+ if (ret)
+ return ret;
+
+ ret = dwmac->data->setup(dwmac);
+ if (ret)
+ clk_disable_unprepare(dwmac->clk);
+
+ return ret;
+}
+
static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv)
{
struct oxnas_dwmac *dwmac = priv;
@@ -128,6 +164,12 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
goto err_remove_config_dt;
}
+ dwmac->data = (const struct oxnas_dwmac_data *)of_device_get_match_data(&pdev->dev);
+ if (!dwmac->data) {
+ ret = -EINVAL;
+ goto err_remove_config_dt;
+ }
+
dwmac->dev = &pdev->dev;
plat_dat->bsp_priv = dwmac;
plat_dat->init = oxnas_dwmac_init;
@@ -166,8 +208,23 @@ err_remove_config_dt:
return ret;
}
+static const struct oxnas_dwmac_data ox810se_dwmac_data = {
+ .setup = oxnas_dwmac_setup_ox810se,
+};
+
+static const struct oxnas_dwmac_data ox820_dwmac_data = {
+ .setup = oxnas_dwmac_setup_ox820,
+};
+
static const struct of_device_id oxnas_dwmac_match[] = {
- { .compatible = "oxsemi,ox820-dwmac" },
+ {
+ .compatible = "oxsemi,ox810se-dwmac",
+ .data = &ox810se_dwmac_data,
+ },
+ {
+ .compatible = "oxsemi,ox820-dwmac",
+ .data = &ox820_dwmac_data,
+ },
{ }
};
MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 63ff2dad8c85..6708ca2aa4f7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7159,7 +7159,8 @@ int stmmac_dvr_probe(struct device *device,
pm_runtime_get_noresume(device);
pm_runtime_set_active(device);
- pm_runtime_enable(device);
+ if (!pm_runtime_enabled(device))
+ pm_runtime_enable(device);
if (priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 33142d505fc8..03575c017500 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev);
int pkt_size = cpsw->rx_packet_max;
int ret = 0, port, ch = xmeta->ch;
- int headroom = CPSW_HEADROOM;
+ int headroom = CPSW_HEADROOM_NA;
struct net_device *ndev = xmeta->ndev;
struct cpsw_priv *priv;
struct page_pool *pool;
@@ -392,7 +392,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
}
if (priv->xdp_prog) {
- int headroom = CPSW_HEADROOM, size = len;
+ int size = len;
xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -442,7 +442,7 @@ requeue:
xmeta->ndev = ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
pkt_size, 0);
if (ret < 0) {
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 279e261e4720..bd4b1528cf99 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -283,7 +283,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
{
struct page *new_page, *page = token;
void *pa = page_address(page);
- int headroom = CPSW_HEADROOM;
+ int headroom = CPSW_HEADROOM_NA;
struct cpsw_meta_xdp *xmeta;
struct cpsw_common *cpsw;
struct net_device *ndev;
@@ -336,7 +336,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
}
if (priv->xdp_prog) {
- int headroom = CPSW_HEADROOM, size = len;
+ int size = len;
xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -386,7 +386,7 @@ requeue:
xmeta->ndev = ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
pkt_size, 0);
if (ret < 0) {
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 3537502e5e8b..ba220593e6db 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1122,7 +1122,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
xmeta->ndev = priv->ndev;
xmeta->ch = ch;
- dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
+ dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM_NA;
ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
page, dma,
cpsw->rx_packet_max,
diff --git a/drivers/net/ethernet/vertexcom/Kconfig b/drivers/net/ethernet/vertexcom/Kconfig
index 6e2cf062ddba..4184a635fe01 100644
--- a/drivers/net/ethernet/vertexcom/Kconfig
+++ b/drivers/net/ethernet/vertexcom/Kconfig
@@ -5,7 +5,7 @@
config NET_VENDOR_VERTEXCOM
bool "Vertexcom devices"
- default n
+ default y
help
If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 23ac353b35fe..377c94ec2486 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -41,8 +41,9 @@
#include "xilinx_axienet.h"
/* Descriptors defines for Tx and Rx DMA */
-#define TX_BD_NUM_DEFAULT 64
+#define TX_BD_NUM_DEFAULT 128
#define RX_BD_NUM_DEFAULT 1024
+#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1)
#define TX_BD_NUM_MAX 4096
#define RX_BD_NUM_MAX 4096
@@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
static int __axienet_device_reset(struct axienet_local *lp)
{
- u32 timeout;
+ u32 value;
+ int ret;
/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
* process of Axi DMA takes a while to complete as all pending
@@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp)
* they both reset the entire DMA core, so only one needs to be used.
*/
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
- timeout = DELAY_OF_ONE_MILLISEC;
- while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
- XAXIDMA_CR_RESET_MASK) {
- udelay(1);
- if (--timeout == 0) {
- netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
- __func__);
- return -ETIMEDOUT;
- }
+ ret = read_poll_timeout(axienet_dma_in32, value,
+ !(value & XAXIDMA_CR_RESET_MASK),
+ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+ XAXIDMA_TX_CR_OFFSET);
+ if (ret) {
+ dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
+ return ret;
+ }
+
+ /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
+ ret = read_poll_timeout(axienet_ior, value,
+ value & XAE_INT_PHYRSTCMPLT_MASK,
+ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+ XAE_IS_OFFSET);
+ if (ret) {
+ dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
+ return ret;
}
return 0;
@@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK))
break;
+ /* Ensure we see complete descriptor update */
+ dma_rmb();
phys = desc_get_phys_addr(lp, cur_p);
dma_unmap_single(ndev->dev.parent, phys,
(cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
@@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
dev_consume_skb_irq(cur_p->skb);
- cur_p->cntrl = 0;
cur_p->app0 = 0;
cur_p->app1 = 0;
cur_p->app2 = 0;
cur_p->app4 = 0;
- cur_p->status = 0;
cur_p->skb = NULL;
+ /* ensure our transmit path and device don't prematurely see status cleared */
+ wmb();
+ cur_p->cntrl = 0;
+ cur_p->status = 0;
if (sizep)
*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
@@ -647,6 +661,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
}
/**
+ * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
+ * @lp: Pointer to the axienet_local structure
+ * @num_frag: The number of BDs to check for
+ *
+ * Return: 0, on success
+ * NETDEV_TX_BUSY, if any of the descriptors are not free
+ *
+ * This function is invoked before BDs are allocated and transmission starts.
+ * This function returns 0 if a BD or group of BDs can be allocated for
+ * transmission. If the BD or any of the BDs are not free the function
+ * returns a busy status. This is invoked from axienet_start_xmit.
+ */
+static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
+ int num_frag)
+{
+ struct axidma_bd *cur_p;
+
+ /* Ensure we see all descriptor updates from device or TX IRQ path */
+ rmb();
+ cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
+ if (cur_p->cntrl)
+ return NETDEV_TX_BUSY;
+ return 0;
+}
+
+/**
* axienet_start_xmit_done - Invoked once a transmit is completed by the
* Axi DMA Tx channel.
* @ndev: Pointer to the net_device structure
@@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev)
/* Matches barrier in axienet_start_xmit */
smp_mb();
- netif_wake_queue(ndev);
-}
-
-/**
- * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
- * @lp: Pointer to the axienet_local structure
- * @num_frag: The number of BDs to check for
- *
- * Return: 0, on success
- * NETDEV_TX_BUSY, if any of the descriptors are not free
- *
- * This function is invoked before BDs are allocated and transmission starts.
- * This function returns 0 if a BD or group of BDs can be allocated for
- * transmission. If the BD or any of the BDs are not free the function
- * returns a busy status. This is invoked from axienet_start_xmit.
- */
-static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
- int num_frag)
-{
- struct axidma_bd *cur_p;
- cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
- if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
- return NETDEV_TX_BUSY;
- return 0;
+ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+ netif_wake_queue(ndev);
}
/**
@@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
num_frag = skb_shinfo(skb)->nr_frags;
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
- if (axienet_check_tx_bd_space(lp, num_frag)) {
- if (netif_queue_stopped(ndev))
- return NETDEV_TX_BUSY;
-
+ if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
+ /* Should not happen as last start_xmit call should have
+ * checked for sufficient space and queue should only be
+ * woken when sufficient space is available.
+ */
netif_stop_queue(ndev);
-
- /* Matches barrier in axienet_start_xmit_done */
- smp_mb();
-
- /* Space might have just been freed - check again */
- if (axienet_check_tx_bd_space(lp, num_frag))
- return NETDEV_TX_BUSY;
-
- netif_wake_queue(ndev);
+ if (net_ratelimit())
+ netdev_warn(ndev, "TX ring unexpectedly full\n");
+ return NETDEV_TX_BUSY;
}
if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (++lp->tx_bd_tail >= lp->tx_bd_num)
lp->tx_bd_tail = 0;
+ /* Stop queue if next transmit may not have space */
+ if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in axienet_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+ netif_wake_queue(ndev);
+ }
+
return NETDEV_TX_OK;
}
@@ -834,6 +859,8 @@ static void axienet_recv(struct net_device *ndev)
tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
+ /* Ensure we see complete descriptor update */
+ dma_rmb();
phys = desc_get_phys_addr(lp, cur_p);
dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
DMA_FROM_DEVICE);
@@ -1352,7 +1379,8 @@ axienet_ethtools_set_ringparam(struct net_device *ndev,
if (ering->rx_pending > RX_BD_NUM_MAX ||
ering->rx_mini_pending ||
ering->rx_jumbo_pending ||
- ering->rx_pending > TX_BD_NUM_MAX)
+ ering->tx_pending < TX_BD_NUM_MIN ||
+ ering->tx_pending > TX_BD_NUM_MAX)
return -EINVAL;
if (netif_running(ndev))
@@ -2027,6 +2055,11 @@ static int axienet_probe(struct platform_device *pdev)
lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+ /* Reset core now that clocks are enabled, prior to accessing MDIO */
+ ret = __axienet_device_reset(lp);
+ if (ret)
+ goto cleanup_clk;
+
lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (lp->phy_node) {
ret = axienet_mdio_setup(lp);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 315278a7cf88..cf69da0e296c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -164,6 +164,7 @@ struct hv_netvsc_packet {
u32 total_bytes;
u32 send_buf_index;
u32 total_data_buflen;
+ struct hv_dma_range *dma_range;
};
#define NETVSC_HASH_KEYLEN 40
@@ -1074,6 +1075,7 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
+ void *recv_original_buf;
u32 recv_buf_size; /* allocated bytes */
struct vmbus_gpadl recv_buf_gpadl_handle;
u32 recv_section_cnt;
@@ -1082,6 +1084,7 @@ struct netvsc_device {
/* Send buffer allocated by us */
void *send_buf;
+ void *send_original_buf;
u32 send_buf_size;
struct vmbus_gpadl send_buf_gpadl_handle;
u32 send_section_cnt;
@@ -1731,4 +1734,6 @@ struct rndis_message {
#define RETRY_US_HI 10000
#define RETRY_MAX 2000 /* >10 sec */
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet);
#endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 5086cd07d1ed..afa81a9480cc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (nvdev->recv_original_buf) {
+ hv_unmap_memory(nvdev->recv_buf);
+ vfree(nvdev->recv_original_buf);
+ } else {
+ vfree(nvdev->recv_buf);
+ }
+
+ if (nvdev->send_original_buf) {
+ hv_unmap_memory(nvdev->send_buf);
+ vfree(nvdev->send_original_buf);
+ } else {
+ vfree(nvdev->send_buf);
+ }
+
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
@@ -337,6 +350,7 @@ static int netvsc_init_buf(struct hv_device *device,
struct nvsp_message *init_packet;
unsigned int buf_size;
int i, ret = 0;
+ void *vaddr;
/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
@@ -372,6 +386,17 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
+ if (hv_isolation_type_snp()) {
+ vaddr = hv_map_memory(net_device->recv_buf, buf_size);
+ if (!vaddr) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ net_device->recv_original_buf = net_device->recv_buf;
+ net_device->recv_buf = vaddr;
+ }
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -475,6 +500,17 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
+ if (hv_isolation_type_snp()) {
+ vaddr = hv_map_memory(net_device->send_buf, buf_size);
+ if (!vaddr) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ net_device->send_original_buf = net_device->send_buf;
+ net_device->send_buf = vaddr;
+ }
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -764,7 +800,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
/* Notify the layer above us */
if (likely(skb)) {
- const struct hv_netvsc_packet *packet
+ struct hv_netvsc_packet *packet
= (struct hv_netvsc_packet *)skb->cb;
u32 send_index = packet->send_buf_index;
struct netvsc_stats *tx_stats;
@@ -780,6 +816,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
tx_stats->bytes += packet->total_bytes;
u64_stats_update_end(&tx_stats->syncp);
+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
napi_consume_skb(skb, budget);
}
@@ -944,6 +981,88 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
memset(dest, 0, padding);
}
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet)
+{
+ u32 page_count = packet->cp_partial ?
+ packet->page_buf_cnt - packet->rmsg_pgcnt :
+ packet->page_buf_cnt;
+ int i;
+
+ if (!hv_is_isolation_supported())
+ return;
+
+ if (!packet->dma_range)
+ return;
+
+ for (i = 0; i < page_count; i++)
+ dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
+ packet->dma_range[i].mapping_size,
+ DMA_TO_DEVICE);
+
+ kfree(packet->dma_range);
+}
+
+/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
+ * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
+ * VM.
+ *
+ * In isolation VM, netvsc send buffer has been marked visible to
+ * host and so the data copied to send buffer doesn't need to use
+ * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
+ * may not be copied to send buffer and so these pages need to be
+ * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
+ * that. The pfns in the struct hv_page_buffer need to be converted
+ * to bounce buffer's pfn. The loop here is necessary because the
+ * entries in the page buffer array are not necessarily full
+ * pages of data. Each entry in the array has a separate offset and
+ * len that may be non-zero, even for entries in the middle of the
+ * array. And the entries are not physically contiguous. So each
+ * entry must be individually mapped rather than as a contiguous unit.
+ * So not use dma_map_sg() here.
+ */
+static int netvsc_dma_map(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet,
+ struct hv_page_buffer *pb)
+{
+ u32 page_count = packet->cp_partial ?
+ packet->page_buf_cnt - packet->rmsg_pgcnt :
+ packet->page_buf_cnt;
+ dma_addr_t dma;
+ int i;
+
+ if (!hv_is_isolation_supported())
+ return 0;
+
+ packet->dma_range = kcalloc(page_count,
+ sizeof(*packet->dma_range),
+ GFP_KERNEL);
+ if (!packet->dma_range)
+ return -ENOMEM;
+
+ for (i = 0; i < page_count; i++) {
+ char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
+ + pb[i].offset);
+ u32 len = pb[i].len;
+
+ dma = dma_map_single(&hv_dev->device, src, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&hv_dev->device, dma)) {
+ kfree(packet->dma_range);
+ return -ENOMEM;
+ }
+
+ /* pb[].offset and pb[].len are not changed during dma mapping
+ * and so not reassign.
+ */
+ packet->dma_range[i].dma = dma;
+ packet->dma_range[i].mapping_size = len;
+ pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -984,14 +1103,24 @@ static inline int netvsc_send_pkt(
trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+ packet->dma_range = NULL;
if (packet->page_buf_cnt) {
if (packet->cp_partial)
pb += packet->rmsg_pgcnt;
+ ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
+ if (ret) {
+ ret = -EAGAIN;
+ goto exit;
+ }
+
ret = vmbus_sendpacket_pagebuffer(out_channel,
pb, packet->page_buf_cnt,
&nvmsg, sizeof(nvmsg),
req_id);
+
+ if (ret)
+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
} else {
ret = vmbus_sendpacket(out_channel,
&nvmsg, sizeof(nvmsg),
@@ -999,6 +1128,7 @@ static inline int netvsc_send_pkt(
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
+exit:
if (ret == 0) {
atomic_inc_return(&nvchan->queue_sends);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index efa963b7af54..3646469433b1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2516,6 +2516,7 @@ static int netvsc_probe(struct hv_device *dev,
net->netdev_ops = &device_ops;
net->ethtool_ops = &ethtool_ops;
SET_NETDEV_DEV(net, &dev->device);
+ dma_set_min_align_mask(&dev->device, HV_HYP_PAGE_SIZE - 1);
/* We always need headroom for rndis header */
net->needed_headroom = RNDIS_AND_PPI_SIZE;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f6c9c2a670f9..448fcc325ed7 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
}
}
+ netvsc_dma_unmap(((struct net_device_context *)
+ netdev_priv(ndev))->device_ctx, &request->pkt);
complete(&request->wait_event);
} else {
netdev_err(ndev,
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 49d9a077d037..68291a3efd04 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1080,27 +1080,38 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
{
struct gsi *gsi;
u32 backlog;
+ int delta;
- if (!endpoint->replenish_enabled) {
+ if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
if (add_one)
atomic_inc(&endpoint->replenish_saved);
return;
}
+ /* If already active, just update the backlog */
+ if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
+ if (add_one)
+ atomic_inc(&endpoint->replenish_backlog);
+ return;
+ }
+
while (atomic_dec_not_zero(&endpoint->replenish_backlog))
if (ipa_endpoint_replenish_one(endpoint))
goto try_again_later;
+
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+
if (add_one)
atomic_inc(&endpoint->replenish_backlog);
return;
try_again_later:
- /* The last one didn't succeed, so fix the backlog */
- backlog = atomic_inc_return(&endpoint->replenish_backlog);
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
- if (add_one)
- atomic_inc(&endpoint->replenish_backlog);
+ /* The last one didn't succeed, so fix the backlog */
+ delta = add_one ? 2 : 1;
+ backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
/* Whenever a receive buffer transaction completes we'll try to
* replenish again. It's unlikely, but if we fail to supply even
@@ -1120,7 +1131,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
u32 max_backlog;
u32 saved;
- endpoint->replenish_enabled = true;
+ set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
atomic_add(saved, &endpoint->replenish_backlog);
@@ -1134,7 +1145,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
{
u32 backlog;
- endpoint->replenish_enabled = false;
+ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
atomic_add(backlog, &endpoint->replenish_saved);
}
@@ -1691,7 +1702,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
/* RX transactions require a single TRE, so the maximum
* backlog is the same as the maximum outstanding TREs.
*/
- endpoint->replenish_enabled = false;
+ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
atomic_set(&endpoint->replenish_saved,
gsi_channel_tre_max(gsi, endpoint->channel_id));
atomic_set(&endpoint->replenish_backlog, 0);
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 0a859d10312d..0313cdc607de 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -41,6 +41,19 @@ enum ipa_endpoint_name {
#define IPA_ENDPOINT_MAX 32 /* Max supported by driver */
/**
+ * enum ipa_replenish_flag: RX buffer replenish flags
+ *
+ * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled
+ * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway
+ * @IPA_REPLENISH_COUNT: Number of defined replenish flags
+ */
+enum ipa_replenish_flag {
+ IPA_REPLENISH_ENABLED,
+ IPA_REPLENISH_ACTIVE,
+ IPA_REPLENISH_COUNT, /* Number of flags (must be last) */
+};
+
+/**
* struct ipa_endpoint - IPA endpoint information
* @ipa: IPA pointer
* @ee_id: Execution environmnent endpoint is associated with
@@ -51,7 +64,7 @@ enum ipa_endpoint_name {
* @trans_tre_max: Maximum number of TRE descriptors per transaction
* @evt_ring_id: GSI event ring used by the endpoint
* @netdev: Network device pointer, if endpoint uses one
- * @replenish_enabled: Whether receive buffer replenishing is enabled
+ * @replenish_flags: Replenishing state flags
* @replenish_ready: Number of replenish transactions without doorbell
* @replenish_saved: Replenish requests held while disabled
* @replenish_backlog: Number of buffers needed to fill hardware queue
@@ -72,7 +85,7 @@ struct ipa_endpoint {
struct net_device *netdev;
/* Receive buffer replenishing for RX endpoints */
- bool replenish_enabled;
+ DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
u32 replenish_ready;
atomic_t replenish_saved;
atomic_t replenish_backlog;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index dae95d9a07e8..5b6c0d120e09 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -421,7 +421,7 @@ static int at803x_set_wol(struct phy_device *phydev,
const u8 *mac;
int ret, irq_enabled;
unsigned int i;
- const unsigned int offsets[] = {
+ static const unsigned int offsets[] = {
AT803X_LOC_MAC_ADDR_32_47_OFFSET,
AT803X_LOC_MAC_ADDR_16_31_OFFSET,
AT803X_LOC_MAC_ADDR_0_15_OFFSET,
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 739859c0dfb1..fa71fb7a66b5 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -189,6 +189,8 @@
#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4
#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
+#define MII_88E1510_MSCR_2 0x15
+
#define MII_VCT5_TX_RX_MDI0_COUPLING 0x10
#define MII_VCT5_TX_RX_MDI1_COUPLING 0x11
#define MII_VCT5_TX_RX_MDI2_COUPLING 0x12
@@ -1932,6 +1934,58 @@ static void marvell_get_stats(struct phy_device *phydev,
data[i] = marvell_get_stat(phydev, i);
}
+static int m88e1510_loopback(struct phy_device *phydev, bool enable)
+{
+ int err;
+
+ if (enable) {
+ u16 bmcr_ctl = 0, mscr2_ctl = 0;
+
+ if (phydev->speed == SPEED_1000)
+ bmcr_ctl = BMCR_SPEED1000;
+ else if (phydev->speed == SPEED_100)
+ bmcr_ctl = BMCR_SPEED100;
+
+ if (phydev->duplex == DUPLEX_FULL)
+ bmcr_ctl |= BMCR_FULLDPLX;
+
+ err = phy_write(phydev, MII_BMCR, bmcr_ctl);
+ if (err < 0)
+ return err;
+
+ if (phydev->speed == SPEED_1000)
+ mscr2_ctl = BMCR_SPEED1000;
+ else if (phydev->speed == SPEED_100)
+ mscr2_ctl = BMCR_SPEED100;
+
+ err = phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
+ MII_88E1510_MSCR_2, BMCR_SPEED1000 |
+ BMCR_SPEED100, mscr2_ctl);
+ if (err < 0)
+ return err;
+
+ /* Need soft reset to have speed configuration takes effect */
+ err = genphy_soft_reset(phydev);
+ if (err < 0)
+ return err;
+
+ /* FIXME: Based on trial and error test, it seem 1G need to have
+ * delay between soft reset and loopback enablement.
+ */
+ if (phydev->speed == SPEED_1000)
+ msleep(1000);
+
+ return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
+ BMCR_LOOPBACK);
+ } else {
+ err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0);
+ if (err < 0)
+ return err;
+
+ return phy_config_aneg(phydev);
+ }
+}
+
static int marvell_vct5_wait_complete(struct phy_device *phydev)
{
int i;
@@ -3078,7 +3132,7 @@ static struct phy_driver marvell_drivers[] = {
.get_sset_count = marvell_get_sset_count,
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
- .set_loopback = genphy_loopback,
+ .set_loopback = m88e1510_loopback,
.get_tunable = m88e1011_get_tunable,
.set_tunable = m88e1011_set_tunable,
.cable_test_start = marvell_vct7_cable_test_start,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 4570cb9535b7..a7ebcdab415b 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1726,8 +1726,8 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8021,
.phy_id_mask = 0x00ffffff,
@@ -1741,8 +1741,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8031,
.phy_id_mask = 0x00ffffff,
@@ -1756,8 +1756,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8041,
.phy_id_mask = MICREL_PHY_ID_MASK,
@@ -1788,8 +1788,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.name = "Micrel KSZ8051",
/* PHY_BASIC_FEATURES */
@@ -1802,8 +1802,8 @@ static struct phy_driver ksphy_driver[] = {
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.match_phy_device = ksz8051_match_phy_device,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8001,
.name = "Micrel KSZ8001 or KS8721",
@@ -1817,8 +1817,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8081,
.name = "Micrel KSZ8081 or KSZ8091",
@@ -1848,8 +1848,8 @@ static struct phy_driver ksphy_driver[] = {
.config_init = ksz8061_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ9021,
.phy_id_mask = 0x000ffffe,
@@ -1864,8 +1864,8 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
.read_mmd = genphy_read_mmd_unsupported,
.write_mmd = genphy_write_mmd_unsupported,
}, {
@@ -1883,7 +1883,7 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
+ .suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
.phy_id = PHY_ID_LAN8814,
@@ -1928,7 +1928,7 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
+ .suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8873MLL,
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index ab77a9f439ef..4720b24ca51b 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1641,17 +1641,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp)
static int sfp_module_parse_power(struct sfp *sfp)
{
u32 power_mW = 1000;
+ bool supports_a2;
if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
power_mW = 1500;
if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
power_mW = 2000;
+ supports_a2 = sfp->id.ext.sff8472_compliance !=
+ SFP_SFF8472_COMPLIANCE_NONE ||
+ sfp->id.ext.diagmon & SFP_DIAGMON_DDM;
+
if (power_mW > sfp->max_power_mW) {
/* Module power specification exceeds the allowed maximum. */
- if (sfp->id.ext.sff8472_compliance ==
- SFP_SFF8472_COMPLIANCE_NONE &&
- !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) {
+ if (!supports_a2) {
/* The module appears not to implement bus address
* 0xa2, so assume that the module powers up in the
* indicated mode.
@@ -1668,11 +1671,25 @@ static int sfp_module_parse_power(struct sfp *sfp)
}
}
+ if (power_mW <= 1000) {
+ /* Modules below 1W do not require a power change sequence */
+ sfp->module_power_mW = power_mW;
+ return 0;
+ }
+
+ if (!supports_a2) {
+ /* The module power level is below the host maximum and the
+ * module appears not to implement bus address 0xa2, so assume
+ * that the module powers up in the indicated mode.
+ */
+ return 0;
+ }
+
/* If the module requires a higher power mode, but also requires
* an address change sequence, warn the user that the module may
* not be functional.
*/
- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) {
+ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) {
dev_warn(sfp->dev,
"Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n",
power_mW / 1000, (power_mW / 100) % 10);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index f510e8219470..37e5f3495362 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1316,6 +1316,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */
{QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */
{QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */
+ {QMI_FIXED_INTF(0x19d2, 0x1485, 5)}, /* ZTE MF286D */
{QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */
{QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */
{QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */
@@ -1401,6 +1402,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
+ {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index abe0149ed917..bc1e3dd67c04 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1962,7 +1962,8 @@ static const struct driver_info smsc95xx_info = {
.bind = smsc95xx_bind,
.unbind = smsc95xx_unbind,
.link_reset = smsc95xx_link_reset,
- .reset = smsc95xx_start_phy,
+ .reset = smsc95xx_reset,
+ .check_connect = smsc95xx_start_phy,
.stop = smsc95xx_stop,
.rx_fixup = smsc95xx_rx_fixup,
.tx_fixup = smsc95xx_tx_fixup,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 569eecfbc2cd..a801ea40908f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2101,7 +2101,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
stragglers = num_cpu >= vi->curr_queue_pairs ?
num_cpu % vi->curr_queue_pairs :
0;
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < vi->curr_queue_pairs; i++) {
group_size = stride + (i < stragglers ? 1 : 0);
@@ -3312,7 +3312,7 @@ static int virtnet_probe(struct virtio_device *vdev)
return 0;
free_unregister_netdev:
- vi->vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
unregister_netdev(dev);
free_failover:
@@ -3328,7 +3328,7 @@ free:
static void remove_vq_common(struct virtnet_info *vi)
{
- vi->vdev->config->reset(vi->vdev);
+ virtio_reset_device(vi->vdev);
/* Free unused buffers in both send and recv, if any. */
free_unused_bufs(vi);
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index c0cfd9b36c0b..720952b92e78 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -302,6 +302,41 @@ void wg_noise_set_static_identity_private_key(
static_identity->static_public, private_key);
}
+static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen)
+{
+ struct blake2s_state state;
+ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
+ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
+ int i;
+
+ if (keylen > BLAKE2S_BLOCK_SIZE) {
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, key, keylen);
+ blake2s_final(&state, x_key);
+ } else
+ memcpy(x_key, key, keylen);
+
+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+ x_key[i] ^= 0x36;
+
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&state, in, inlen);
+ blake2s_final(&state, i_hash);
+
+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+ x_key[i] ^= 0x5c ^ 0x36;
+
+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
+ blake2s_final(&state, i_hash);
+
+ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
+ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
+ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
+}
+
/* This is Hugo Krawczyk's HKDF:
* - https://eprint.iacr.org/2010/264.pdf
* - https://tools.ietf.org/html/rfc5869
@@ -322,14 +357,14 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
((third_len || third_dst) && (!second_len || !second_dst))));
/* Extract entropy from data into secret */
- blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
+ hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
if (!first_dst || !first_len)
goto out;
/* Expand first key: key = secret, data = 0x1 */
output[0] = 1;
- blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
+ hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
memcpy(first_dst, output, first_len);
if (!second_dst || !second_len)
@@ -337,8 +372,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
/* Expand second key: key = secret, data = first-key || 0x2 */
output[BLAKE2S_HASH_SIZE] = 2;
- blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
- BLAKE2S_HASH_SIZE);
+ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
memcpy(second_dst, output, second_len);
if (!third_dst || !third_len)
@@ -346,8 +380,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
/* Expand third key: key = secret, data = second-key || 0x3 */
output[BLAKE2S_HASH_SIZE] = 3;
- blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
- BLAKE2S_HASH_SIZE);
+ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
memcpy(third_dst, output, third_len);
out:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
index 2f3c451148db..2f8908074303 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
@@ -4,6 +4,8 @@
*/
#include <asm/unaligned.h>
+
+#include <linux/math.h>
#include <linux/string.h>
#include <linux/bug.h>
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 45594f003ef7..452d08545d31 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -4672,7 +4672,7 @@ static ssize_t proc_write(struct file *file,
static int proc_status_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *apriv = dev->ml_priv;
CapabilityRid cap_rid;
StatusRid status_rid;
@@ -4756,7 +4756,7 @@ static int proc_stats_rid_open(struct inode *inode,
u16 rid)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *apriv = dev->ml_priv;
StatsRid stats;
int i, j;
@@ -4819,7 +4819,7 @@ static inline int sniffing_mode(struct airo_info *ai)
static void proc_config_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *line;
@@ -5030,7 +5030,7 @@ static const char *get_rmode(__le16 mode)
static int proc_config_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
__le16 mode;
@@ -5120,7 +5120,7 @@ static int proc_config_open(struct inode *inode, struct file *file)
static void proc_SSID_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
SsidRid SSID_rid;
int i;
@@ -5156,7 +5156,7 @@ static void proc_SSID_on_close(struct inode *inode, struct file *file)
static void proc_APList_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data = file->private_data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
APListRid *APList_rid = &ai->APList;
int i;
@@ -5280,7 +5280,7 @@ static int set_wep_tx_idx(struct airo_info *ai, u16 index, int perm, int lock)
static void proc_wepkey_on_close(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i, rc;
char key[16];
@@ -5331,7 +5331,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file)
static int proc_wepkey_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *ptr;
WepKeyRid wkr;
@@ -5379,7 +5379,7 @@ static int proc_wepkey_open(struct inode *inode, struct file *file)
static int proc_SSID_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
char *ptr;
@@ -5423,7 +5423,7 @@ static int proc_SSID_open(struct inode *inode, struct file *file)
static int proc_APList_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
int i;
char *ptr;
@@ -5462,7 +5462,7 @@ static int proc_APList_open(struct inode *inode, struct file *file)
static int proc_BSSList_open(struct inode *inode, struct file *file)
{
struct proc_data *data;
- struct net_device *dev = PDE_DATA(inode);
+ struct net_device *dev = pde_data(inode);
struct airo_info *ai = dev->ml_priv;
char *ptr;
BSSListRid BSSList_rid;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index 8bcc1cdcb75b..462ccc7d7d1a 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev,
#if !defined(PRISM2_NO_PROCFS_DEBUG) && defined(CONFIG_PROC_FS)
static int ap_debug_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast);
seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast);
@@ -320,7 +320,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct ap_data *ap,
static int ap_control_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
char *policy_txt;
struct mac_entry *entry;
@@ -352,20 +352,20 @@ static int ap_control_proc_show(struct seq_file *m, void *v)
static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_lock_bh(&ap->mac_restrictions.lock);
return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos);
}
static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos);
}
static void ap_control_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_unlock_bh(&ap->mac_restrictions.lock);
}
@@ -554,20 +554,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void *v)
static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_lock_bh(&ap->sta_table_lock);
return seq_list_start_head(&ap->sta_list, *_pos);
}
static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
return seq_list_next(v, &ap->sta_list, _pos);
}
static void prism2_ap_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = PDE_DATA(file_inode(m->file));
+ struct ap_data *ap = pde_data(file_inode(m->file));
spin_unlock_bh(&ap->sta_table_lock);
}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_download.c b/drivers/net/wireless/intersil/hostap/hostap_download.c
index 7c6a5a6d1d45..3672291ced5c 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_download.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_download.c
@@ -227,7 +227,7 @@ static int prism2_download_aux_dump_proc_open(struct inode *inode, struct file *
sizeof(struct prism2_download_aux_dump));
if (ret == 0) {
struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
+ m->private = pde_data(inode);
}
return ret;
}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c
index 51c847d98755..61f68786056f 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -97,20 +97,20 @@ static int prism2_wds_proc_show(struct seq_file *m, void *v)
static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
read_lock_bh(&local->iface_lock);
return seq_list_start(&local->hostap_interfaces, *_pos);
}
static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
return seq_list_next(v, &local->hostap_interfaces, _pos);
}
static void prism2_wds_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
read_unlock_bh(&local->iface_lock);
}
@@ -123,7 +123,7 @@ static const struct seq_operations prism2_wds_proc_seqops = {
static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
struct list_head *ptr = v;
struct hostap_bss_info *bss;
@@ -151,21 +151,21 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos)
__acquires(&local->lock)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_lock_bh(&local->lock);
return seq_list_start_head(&local->bss_list, *_pos);
}
static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
return seq_list_next(v, &local->bss_list, _pos);
}
static void prism2_bss_list_proc_stop(struct seq_file *m, void *v)
__releases(&local->lock)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
@@ -198,7 +198,7 @@ static int prism2_crypt_proc_show(struct seq_file *m, void *v)
static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf,
size_t count, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(file));
+ local_info_t *local = pde_data(file_inode(file));
size_t off;
if (local->pda == NULL || *_pos >= PRISM2_PDA_SIZE)
@@ -272,7 +272,7 @@ static int prism2_io_debug_proc_read(char *page, char **start, off_t off,
#ifndef PRISM2_NO_STATION_MODES
static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
unsigned long entry;
int i, len;
struct hfa384x_hostscan_result *scanres;
@@ -322,7 +322,7 @@ static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_lock_bh(&local->lock);
/* We have a header (pos 0) + N results to show (pos 1...N) */
@@ -333,7 +333,7 @@ static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
++*_pos;
if (*_pos > local->last_scan_results_count)
@@ -343,7 +343,7 @@ static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *
static void prism2_scan_results_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = PDE_DATA(file_inode(m->file));
+ local_info_t *local = pde_data(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0307a6677907..8d54f9face2f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -4498,7 +4498,7 @@ static void remove_vqs(struct virtio_device *vdev)
{
int i;
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
for (i = 0; i < ARRAY_SIZE(hwsim_vqs); i++) {
struct virtqueue *vq = hwsim_vqs[i];
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index e3a3dc3e45b4..2987ad9271f6 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2746,7 +2746,7 @@ static ssize_t int_proc_write(struct file *file, const char __user *buffer,
nr = nr * 10 + c;
p++;
} while (--len);
- *(int *)PDE_DATA(file_inode(file)) = nr;
+ *(int *)pde_data(file_inode(file)) = nr;
return count;
}
diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
index a0c5d02ae88c..8a3d86897ea8 100644
--- a/drivers/net/wireless/rsi/rsi_91x_coex.c
+++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
@@ -63,7 +63,7 @@ static void rsi_coex_scheduler_thread(struct rsi_common *common)
rsi_coex_sched_tx_pkts(coex_cb);
} while (atomic_read(&coex_cb->coex_tx_thread.thread_done) == 0);
- complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
+ kthread_complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
}
int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 5d1490fc32db..f9f004446b07 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -264,7 +264,7 @@ static void rsi_tx_scheduler_thread(struct rsi_common *common)
if (common->init_done)
rsi_core_qos_processor(common);
} while (atomic_read(&common->tx_thread.thread_done) == 0);
- complete_and_exit(&common->tx_thread.completion, 0);
+ kthread_complete_and_exit(&common->tx_thread.completion, 0);
}
#ifdef CONFIG_RSI_COEX
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 8ace1874e5cb..b2b47a0abcbf 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -75,7 +75,7 @@ void rsi_sdio_rx_thread(struct rsi_common *common)
rsi_dbg(INFO_ZONE, "%s: Terminated SDIO RX thread\n", __func__);
atomic_inc(&sdev->rx_thread.thread_done);
- complete_and_exit(&sdev->rx_thread.completion, 0);
+ kthread_complete_and_exit(&sdev->rx_thread.completion, 0);
}
/**
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index 4ffcdde1acb1..5130b0e72adc 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -56,6 +56,6 @@ void rsi_usb_rx_thread(struct rsi_common *common)
out:
rsi_dbg(INFO_ZONE, "%s: Terminated thread\n", __func__);
skb_queue_purge(&dev->rx_q);
- complete_and_exit(&dev->rx_thread.completion, 0);
+ kthread_complete_and_exit(&dev->rx_thread.completion, 0);
}
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
index 71bf9b4f769f..6872782e8dd8 100644
--- a/drivers/net/wwan/mhi_wwan_mbim.c
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
int err;
while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
- struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+ struct sk_buff *skb = alloc_skb(mbim->mru, GFP_KERNEL);
if (unlikely(!skb))
break;
err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
- MHI_DEFAULT_MRU, MHI_EOT);
+ mbim->mru, MHI_EOT);
if (unlikely(err)) {
kfree_skb(skb);
break;
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 37d26f01986b..62a0f1a010cb 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -188,7 +188,7 @@ do { \
static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy)
{
int polarity, retry, ret;
- char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
+ static const char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
int count = sizeof(rset_cmd);
nfc_info(&phy->i2c_dev->dev, "Detecting nfc_en polarity\n");
diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
index a43fc4117fa5..c922f10d0d7b 100644
--- a/drivers/nfc/st21nfca/se.c
+++ b/drivers/nfc/st21nfca/se.c
@@ -316,6 +316,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
return -ENOMEM;
transaction->aid_len = skb->data[1];
+
+ /* Checking if the length of the AID is valid */
+ if (transaction->aid_len > sizeof(transaction->aid))
+ return -EINVAL;
+
memcpy(transaction->aid, &skb->data[2],
transaction->aid_len);
@@ -325,6 +330,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
return -EPROTO;
transaction->params_len = skb->data[transaction->aid_len + 3];
+
+ /* Total size is allocated (skb->len - 2) minus fixed array members */
+ if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction)))
+ return -EINVAL;
+
memcpy(transaction->params, skb->data +
transaction->aid_len + 4, transaction->params_len);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 87847c380051..04550b1f984c 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -1321,6 +1321,8 @@ static const struct ntb_dev_data dev_data[] = {
static const struct pci_device_id amd_ntb_pci_tbl[] = {
{ PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] },
{ PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] },
+ { PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] },
+ { PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] },
{ 0, }
};
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index 4c6eb61a6ac6..88ae18b0efa8 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -297,7 +297,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
* (see CMA_CONFIG_ALIGNMENT)
*/
dev_err(&sndev->stdev->dev,
- "ERROR: Memory window address is not aligned to it's size!\n");
+ "ERROR: Memory window address is not aligned to its size!\n");
return -EINVAL;
}
@@ -419,8 +419,10 @@ static void switchtec_ntb_part_link_speed(struct switchtec_ntb *sndev,
enum ntb_width *width)
{
struct switchtec_dev *stdev = sndev->stdev;
+ struct part_cfg_regs __iomem *part_cfg =
+ &stdev->mmio_part_cfg_all[partition];
- u32 pff = ioread32(&stdev->mmio_part_cfg[partition].vep_pff_inst_id);
+ u32 pff = ioread32(&part_cfg->vep_pff_inst_id) & 0xFF;
u32 linksta = ioread32(&stdev->mmio_pff_csr[pff].pci_cap_region[13]);
if (speed)
@@ -840,7 +842,6 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
u64 tpart_vec;
int self;
u64 part_map;
- int bit;
sndev->ntb.pdev = sndev->stdev->pdev;
sndev->ntb.topo = NTB_TOPO_SWITCH;
@@ -859,31 +860,31 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
tpart_vec |= ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_low);
part_map = ioread64(&sndev->mmio_ntb->ep_map);
+ tpart_vec &= part_map;
part_map &= ~(1 << sndev->self_partition);
- if (!ffs(tpart_vec)) {
+ if (!tpart_vec) {
if (sndev->stdev->partition_count != 2) {
dev_err(&sndev->stdev->dev,
"ntb target partition not defined\n");
return -ENODEV;
}
- bit = ffs(part_map);
- if (!bit) {
+ if (!part_map) {
dev_err(&sndev->stdev->dev,
"peer partition is not NT partition\n");
return -ENODEV;
}
- sndev->peer_partition = bit - 1;
+ sndev->peer_partition = __ffs64(part_map);
} else {
- if (ffs(tpart_vec) != fls(tpart_vec)) {
+ if (__ffs64(tpart_vec) != (fls64(tpart_vec) - 1)) {
dev_err(&sndev->stdev->dev,
"ntb driver only supports 1 pair of 1-1 ntb mapping\n");
return -ENODEV;
}
- sndev->peer_partition = ffs(tpart_vec) - 1;
+ sndev->peer_partition = __ffs64(tpart_vec);
if (!(part_map & (1ULL << sndev->peer_partition))) {
dev_err(&sndev->stdev->dev,
"ntb target partition is not NT partition\n");
@@ -954,7 +955,7 @@ static int config_req_id_table(struct switchtec_ntb *sndev,
u32 error;
u32 proxy_id;
- if (ioread32(&mmio_ctrl->req_id_table_size) < count) {
+ if (ioread16(&mmio_ctrl->req_id_table_size) < count) {
dev_err(&sndev->stdev->dev,
"Not enough requester IDs available.\n");
return -EFAULT;
@@ -966,9 +967,6 @@ static int config_req_id_table(struct switchtec_ntb *sndev,
if (rc)
return rc;
- iowrite32(NTB_PART_CTRL_ID_PROT_DIS,
- &mmio_ctrl->partition_ctrl);
-
for (i = 0; i < count; i++) {
iowrite32(req_ids[i] << 16 | NTB_CTRL_REQ_ID_EN,
&mmio_ctrl->req_id_table[i]);
@@ -1090,7 +1088,7 @@ static int crosslink_enum_partition(struct switchtec_ntb *sndev,
{
struct part_cfg_regs __iomem *part_cfg =
&sndev->stdev->mmio_part_cfg_all[sndev->peer_partition];
- u32 pff = ioread32(&part_cfg->vep_pff_inst_id);
+ u32 pff = ioread32(&part_cfg->vep_pff_inst_id) & 0xFF;
struct pff_csr_regs __iomem *mmio_pff =
&sndev->stdev->mmio_pff_csr[pff];
const u64 bar_space = 0x1000000000LL;
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
index 2818bfcf4031..dd683cb58d09 100644
--- a/drivers/ntb/msi.c
+++ b/drivers/ntb/msi.c
@@ -262,8 +262,9 @@ static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
* @handler: Function to be called when the IRQ occurs
* @thread_fn: Function to be called in a threaded interrupt context. NULL
* for clients which handle everything in @handler
- * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
+ * @name: An ascii name for the claiming device, dev_name(dev) if NULL
* @dev_id: A cookie passed back to the handler function
+ * @msi_desc: MSI descriptor data which triggers the interrupt
*
* This function assigns an interrupt handler to an unused
* MSI interrupt and returns the descriptor used to trigger
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index 88e1f9a0faaf..1fd667852271 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -93,30 +93,30 @@ struct nubus_proc_pde_data {
static struct nubus_proc_pde_data *
nubus_proc_alloc_pde_data(unsigned char *ptr, unsigned int size)
{
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
- pde_data = kmalloc(sizeof(*pde_data), GFP_KERNEL);
- if (!pde_data)
+ pded = kmalloc(sizeof(*pded), GFP_KERNEL);
+ if (!pded)
return NULL;
- pde_data->res_ptr = ptr;
- pde_data->res_size = size;
- return pde_data;
+ pded->res_ptr = ptr;
+ pded->res_size = size;
+ return pded;
}
static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
- pde_data = PDE_DATA(inode);
- if (!pde_data)
+ pded = pde_data(inode);
+ if (!pded)
return 0;
- if (pde_data->res_size > m->size)
+ if (pded->res_size > m->size)
return -EFBIG;
- if (pde_data->res_size) {
+ if (pded->res_size) {
int lanes = (int)proc_get_parent_data(inode);
struct nubus_dirent ent;
@@ -124,11 +124,11 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
return 0;
ent.mask = lanes;
- ent.base = pde_data->res_ptr;
+ ent.base = pded->res_ptr;
ent.data = 0;
- nubus_seq_write_rsrc_mem(m, &ent, pde_data->res_size);
+ nubus_seq_write_rsrc_mem(m, &ent, pded->res_size);
} else {
- unsigned int data = (unsigned int)pde_data->res_ptr;
+ unsigned int data = (unsigned int)pded->res_ptr;
seq_putc(m, data >> 16);
seq_putc(m, data >> 8);
@@ -142,18 +142,18 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
unsigned int size)
{
char name[9];
- struct nubus_proc_pde_data *pde_data;
+ struct nubus_proc_pde_data *pded;
if (!procdir)
return;
snprintf(name, sizeof(name), "%x", ent->type);
if (size)
- pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
+ pded = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
else
- pde_data = NULL;
+ pded = NULL;
proc_create_single_data(name, S_IFREG | 0444, procdir,
- nubus_proc_rsrc_show, pde_data);
+ nubus_proc_rsrc_show, pded);
}
void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 726c7354d465..995b6cdc67ed 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -105,7 +105,7 @@ static void virtio_pmem_remove(struct virtio_device *vdev)
nvdimm_bus_unregister(nvdimm_bus);
vdev->config->del_vqs(vdev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
}
static struct virtio_driver virtio_pmem_driver = {
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index e765d3d0542e..23a38dcf0fc4 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -312,6 +312,8 @@ static umode_t nvmem_bin_attr_is_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct nvmem_device *nvmem = to_nvmem_device(dev);
+ attr->size = nvmem->size;
+
return nvmem_bin_attr_get_umode(nvmem);
}
diff --git a/drivers/nvmem/mtk-efuse.c b/drivers/nvmem/mtk-efuse.c
index 6a537d959f14..e9a375dd84af 100644
--- a/drivers/nvmem/mtk-efuse.c
+++ b/drivers/nvmem/mtk-efuse.c
@@ -19,11 +19,12 @@ static int mtk_reg_read(void *context,
unsigned int reg, void *_val, size_t bytes)
{
struct mtk_efuse_priv *priv = context;
- u32 *val = _val;
- int i = 0, words = bytes / 4;
+ void __iomem *addr = priv->base + reg;
+ u8 *val = _val;
+ int i;
- while (words--)
- *val++ = readl(priv->base + reg + (i++ * 4));
+ for (i = 0; i < bytes; i++, val++)
+ *val = readb(addr + i);
return 0;
}
@@ -45,8 +46,8 @@ static int mtk_efuse_probe(struct platform_device *pdev)
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
- econfig.stride = 4;
- econfig.word_size = 4;
+ econfig.stride = 1;
+ econfig.word_size = 1;
econfig.reg_read = mtk_reg_read;
econfig.size = resource_size(res);
econfig.priv = priv;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 8a24d37153b4..e7d92b67cb8a 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1420,15 +1420,18 @@ int of_phandle_iterator_args(struct of_phandle_iterator *it,
return count;
}
-static int __of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name,
- const char *cells_name,
- int cell_count, int index,
- struct of_phandle_args *out_args)
+int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count, int index,
+ struct of_phandle_args *out_args)
{
struct of_phandle_iterator it;
int rc, cur_index = 0;
+ if (index < 0)
+ return -EINVAL;
+
/* Loop over the phandles until all the requested entry is found */
of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) {
/*
@@ -1471,82 +1474,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np,
of_node_put(it.node);
return rc;
}
-
-/**
- * of_parse_phandle - Resolve a phandle property to a device_node pointer
- * @np: Pointer to device node holding phandle property
- * @phandle_name: Name of property holding a phandle value
- * @index: For properties holding a table of phandles, this is the index into
- * the table
- *
- * Return: The device_node pointer with refcount incremented. Use
- * of_node_put() on it when done.
- */
-struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name, int index)
-{
- struct of_phandle_args args;
-
- if (index < 0)
- return NULL;
-
- if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
- index, &args))
- return NULL;
-
- return args.np;
-}
-EXPORT_SYMBOL(of_parse_phandle);
-
-/**
- * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
- * @np: pointer to a device tree node containing a list
- * @list_name: property name that contains a list
- * @cells_name: property name that specifies phandles' arguments count
- * @index: index of a phandle to parse out
- * @out_args: optional pointer to output arguments structure (will be filled)
- *
- * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_args, on error returns appropriate
- * errno value.
- *
- * Caller is responsible to call of_node_put() on the returned out_args->np
- * pointer.
- *
- * Example::
- *
- * phandle1: node1 {
- * #list-cells = <2>;
- * };
- *
- * phandle2: node2 {
- * #list-cells = <1>;
- * };
- *
- * node3 {
- * list = <&phandle1 1 2 &phandle2 3>;
- * };
- *
- * To get a device_node of the ``node2`` node you may call this:
- * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
- */
-int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
- const char *cells_name, int index,
- struct of_phandle_args *out_args)
-{
- int cell_count = -1;
-
- if (index < 0)
- return -EINVAL;
-
- /* If cells_name is NULL we assume a cell count of 0 */
- if (!cells_name)
- cell_count = 0;
-
- return __of_parse_phandle_with_args(np, list_name, cells_name,
- cell_count, index, out_args);
-}
-EXPORT_SYMBOL(of_parse_phandle_with_args);
+EXPORT_SYMBOL(__of_parse_phandle_with_args);
/**
* of_parse_phandle_with_args_map() - Find a node pointed by phandle in a list and remap it
@@ -1733,47 +1661,6 @@ free:
EXPORT_SYMBOL(of_parse_phandle_with_args_map);
/**
- * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
- * @np: pointer to a device tree node containing a list
- * @list_name: property name that contains a list
- * @cell_count: number of argument cells following the phandle
- * @index: index of a phandle to parse out
- * @out_args: optional pointer to output arguments structure (will be filled)
- *
- * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_args, on error returns appropriate
- * errno value.
- *
- * Caller is responsible to call of_node_put() on the returned out_args->np
- * pointer.
- *
- * Example::
- *
- * phandle1: node1 {
- * };
- *
- * phandle2: node2 {
- * };
- *
- * node3 {
- * list = <&phandle1 0 2 &phandle2 2 3>;
- * };
- *
- * To get a device_node of the ``node2`` node you may call this:
- * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
- */
-int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cell_count,
- int index, struct of_phandle_args *out_args)
-{
- if (index < 0)
- return -EINVAL;
- return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
- index, out_args);
-}
-EXPORT_SYMBOL(of_parse_phandle_with_fixed_args);
-
-/**
* of_count_phandle_with_args() - Find the number of phandles references in a property
* @np: pointer to a device tree node containing a list
* @list_name: property name that contains a list
diff --git a/drivers/of/device.c b/drivers/of/device.c
index b0800c260f64..874f031442dc 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -28,7 +28,7 @@
const struct of_device_id *of_match_device(const struct of_device_id *matches,
const struct device *dev)
{
- if ((!matches) || (!dev->of_node))
+ if (!matches || !dev->of_node || dev->of_node_reused)
return NULL;
return of_match_node(matches, dev->of_node);
}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ca2cfb3012a4..ad85ff6474ff 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -26,6 +26,7 @@
#include <linux/serial_core.h>
#include <linux/sysfs.h>
#include <linux/random.h>
+#include <linux/kmemleak.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include <asm/page.h>
@@ -524,9 +525,12 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
- early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
+ early_init_dt_reserve_memory_arch(base, size, nomap) == 0) {
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
+ if (!nomap)
+ kmemleak_alloc_phys(base, size, 0, 0);
+ }
else
pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index cf91cb024be3..1e4a5663d011 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -168,14 +168,14 @@ static int led_proc_show(struct seq_file *m, void *v)
static int led_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, led_proc_show, PDE_DATA(inode));
+ return single_open(file, led_proc_show, pde_data(inode));
}
static ssize_t led_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- void *data = PDE_DATA(file_inode(file));
+ void *data = pde_data(file_inode(file));
char *cur, lbuf[32];
int d;
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index 9513c39719d1..d9e51036a4fa 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -980,8 +980,10 @@ pdcs_register_pathentries(void)
entry->kobj.kset = paths_kset;
err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL,
"%s", entry->name);
- if (err)
+ if (err) {
+ kobject_put(&entry->kobj);
return err;
+ }
/* kobject is now registered */
write_lock(&entry->rw_lock);
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 43e615aa12ff..d98fafdd0f99 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -184,7 +184,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
- depends on X86_64 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
+ depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
select PCI_HYPERV_INTERFACE
help
The PCI device frontend driver allows the kernel to import arbitrary
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 46935695cfb9..0d9f6b21babb 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -42,7 +42,10 @@ int noinline pci_bus_read_config_##size \
if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
pci_lock_config(flags); \
res = bus->ops->read(bus, devfn, pos, len, &data); \
- *value = (type)data; \
+ if (res) \
+ PCI_SET_ERROR_RESPONSE(value); \
+ else \
+ *value = (type)data; \
pci_unlock_config(flags); \
return res; \
}
@@ -80,10 +83,8 @@ int pci_generic_config_read(struct pci_bus *bus, unsigned int devfn,
void __iomem *addr;
addr = bus->ops->map_bus(bus, devfn, where);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
if (size == 1)
*val = readb(addr);
@@ -122,10 +123,8 @@ int pci_generic_config_read32(struct pci_bus *bus, unsigned int devfn,
void __iomem *addr;
addr = bus->ops->map_bus(bus, devfn, where & ~0x3);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
*val = readl(addr);
@@ -228,7 +227,10 @@ int pci_user_read_config_##size \
ret = dev->bus->ops->read(dev->bus, dev->devfn, \
pos, sizeof(type), &data); \
raw_spin_unlock_irq(&pci_lock); \
- *val = (type)data; \
+ if (ret) \
+ PCI_SET_ERROR_RESPONSE(val); \
+ else \
+ *val = (type)data; \
return pcibios_err_to_errno(ret); \
} \
EXPORT_SYMBOL_GPL(pci_user_read_config_##size);
@@ -410,9 +412,9 @@ int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
if (pcie_capability_reg_implemented(dev, pos)) {
ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
/*
- * Reset *val to 0 if pci_read_config_word() fails, it may
- * have been written as 0xFFFF if hardware error happens
- * during pci_read_config_word().
+ * Reset *val to 0 if pci_read_config_word() fails; it may
+ * have been written as 0xFFFF (PCI_ERROR_RESPONSE) if the
+ * config read failed on PCI.
*/
if (ret)
*val = 0;
@@ -445,9 +447,9 @@ int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val)
if (pcie_capability_reg_implemented(dev, pos)) {
ret = pci_read_config_dword(dev, pci_pcie_cap(dev) + pos, val);
/*
- * Reset *val to 0 if pci_read_config_dword() fails, it may
- * have been written as 0xFFFFFFFF if hardware error happens
- * during pci_read_config_dword().
+ * Reset *val to 0 if pci_read_config_dword() fails; it may
+ * have been written as 0xFFFFFFFF (PCI_ERROR_RESPONSE) if
+ * the config read failed on PCI.
*/
if (ret)
*val = 0;
@@ -523,7 +525,7 @@ EXPORT_SYMBOL(pcie_capability_clear_and_set_dword);
int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val)
{
if (pci_dev_is_disconnected(dev)) {
- *val = ~0;
+ PCI_SET_ERROR_RESPONSE(val);
return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
@@ -533,7 +535,7 @@ EXPORT_SYMBOL(pci_read_config_byte);
int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val)
{
if (pci_dev_is_disconnected(dev)) {
- *val = ~0;
+ PCI_SET_ERROR_RESPONSE(val);
return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
@@ -544,7 +546,7 @@ int pci_read_config_dword(const struct pci_dev *dev, int where,
u32 *val)
{
if (pci_dev_is_disconnected(dev)) {
- *val = ~0;
+ PCI_SET_ERROR_RESPONSE(val);
return PCIBIOS_DEVICE_NOT_FOUND;
}
return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index aec8c9a3488b..601f2531ee91 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -4,7 +4,7 @@ menu "PCI controller drivers"
depends on PCI
config PCI_MVEBU
- bool "Marvell EBU PCIe controller"
+ tristate "Marvell EBU PCIe controller"
depends on ARCH_MVEBU || ARCH_DOVE || COMPILE_TEST
depends on MVEBU_MBUS
depends on ARM
@@ -281,7 +281,7 @@ config PCIE_BRCMSTB
config PCI_HYPERV_INTERFACE
tristate "Hyper-V PCI Interface"
- depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
help
The Hyper-V PCI Interface is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.
@@ -332,8 +332,8 @@ config PCIE_APPLE
If unsure, say Y if you have an Apple Silicon system.
config PCIE_MT7621
- bool "MediaTek MT7621 PCIe Controller"
- depends on SOC_MT7621 || (MIPS && COMPILE_TEST)
+ tristate "MediaTek MT7621 PCIe Controller"
+ depends on SOC_MT7621 || COMPILE_TEST
select PHY_MT7621_PCI
default SOC_MT7621
help
diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
index 918e11082e6a..489586a4cdc7 100644
--- a/drivers/pci/controller/cadence/pci-j721e.c
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -51,11 +51,10 @@ enum link_status {
#define MAX_LANES 2
struct j721e_pcie {
- struct device *dev;
+ struct cdns_pcie *cdns_pcie;
struct clk *refclk;
u32 mode;
u32 num_lanes;
- struct cdns_pcie *cdns_pcie;
void __iomem *user_cfg_base;
void __iomem *intd_cfg_base;
u32 linkdown_irq_regfield;
@@ -99,7 +98,7 @@ static inline void j721e_pcie_intd_writel(struct j721e_pcie *pcie, u32 offset,
static irqreturn_t j721e_pcie_link_irq_handler(int irq, void *priv)
{
struct j721e_pcie *pcie = priv;
- struct device *dev = pcie->dev;
+ struct device *dev = pcie->cdns_pcie->dev;
u32 reg;
reg = j721e_pcie_intd_readl(pcie, STATUS_REG_SYS_2);
@@ -165,7 +164,7 @@ static const struct cdns_pcie_ops j721e_pcie_ops = {
static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct regmap *syscon,
unsigned int offset)
{
- struct device *dev = pcie->dev;
+ struct device *dev = pcie->cdns_pcie->dev;
u32 mask = J721E_MODE_RC;
u32 mode = pcie->mode;
u32 val = 0;
@@ -184,7 +183,7 @@ static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct regmap *syscon,
static int j721e_pcie_set_link_speed(struct j721e_pcie *pcie,
struct regmap *syscon, unsigned int offset)
{
- struct device *dev = pcie->dev;
+ struct device *dev = pcie->cdns_pcie->dev;
struct device_node *np = dev->of_node;
int link_speed;
u32 val = 0;
@@ -205,7 +204,7 @@ static int j721e_pcie_set_link_speed(struct j721e_pcie *pcie,
static int j721e_pcie_set_lane_count(struct j721e_pcie *pcie,
struct regmap *syscon, unsigned int offset)
{
- struct device *dev = pcie->dev;
+ struct device *dev = pcie->cdns_pcie->dev;
u32 lanes = pcie->num_lanes;
u32 val = 0;
int ret;
@@ -220,7 +219,7 @@ static int j721e_pcie_set_lane_count(struct j721e_pcie *pcie,
static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie)
{
- struct device *dev = pcie->dev;
+ struct device *dev = pcie->cdns_pcie->dev;
struct device_node *node = dev->of_node;
struct of_phandle_args args;
unsigned int offset = 0;
@@ -354,7 +353,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *node = dev->of_node;
struct pci_host_bridge *bridge;
- struct j721e_pcie_data *data;
+ const struct j721e_pcie_data *data;
struct cdns_pcie *cdns_pcie;
struct j721e_pcie *pcie;
struct cdns_pcie_rc *rc;
@@ -367,7 +366,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
int ret;
int irq;
- data = (struct j721e_pcie_data *)of_device_get_match_data(dev);
+ data = of_device_get_match_data(dev);
if (!data)
return -EINVAL;
@@ -377,7 +376,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
if (!pcie)
return -ENOMEM;
- pcie->dev = dev;
pcie->mode = mode;
pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
diff --git a/drivers/pci/controller/cadence/pcie-cadence-plat.c b/drivers/pci/controller/cadence/pcie-cadence-plat.c
index a224afadbcc0..bac0541317c1 100644
--- a/drivers/pci/controller/cadence/pcie-cadence-plat.c
+++ b/drivers/pci/controller/cadence/pcie-cadence-plat.c
@@ -45,7 +45,6 @@ static int cdns_plat_pcie_probe(struct platform_device *pdev)
{
const struct cdns_plat_pcie_of_data *data;
struct cdns_plat_pcie *cdns_plat_pcie;
- const struct of_device_id *match;
struct device *dev = &pdev->dev;
struct pci_host_bridge *bridge;
struct cdns_pcie_ep *ep;
@@ -54,11 +53,10 @@ static int cdns_plat_pcie_probe(struct platform_device *pdev)
bool is_rc;
int ret;
- match = of_match_device(cdns_plat_pcie_of_match, dev);
- if (!match)
+ data = of_device_get_match_data(dev);
+ if (!data)
return -EINVAL;
- data = (struct cdns_plat_pcie_of_data *)match->data;
is_rc = data->is_rc;
pr_debug(" Started %s with is_rc: %d\n", __func__, is_rc);
diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h
index 262421e5d917..c8a27b6290ce 100644
--- a/drivers/pci/controller/cadence/pcie-cadence.h
+++ b/drivers/pci/controller/cadence/pcie-cadence.h
@@ -310,7 +310,7 @@ struct cdns_pcie {
* single function at a time
* @vendor_id: PCI vendor ID
* @device_id: PCI device ID
- * @avail_ib_bar: Satus of RP_BAR0, RP_BAR1 and RP_NO_BAR if it's free or
+ * @avail_ib_bar: Status of RP_BAR0, RP_BAR1 and RP_NO_BAR if it's free or
* available
* @quirk_retrain_flag: Retrain link as quirk for PCIe Gen2
* @quirk_detect_quiet_flag: LTSSM Detect Quiet min delay set as quirk
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index a4221f6f3629..dfcdeb432dc8 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -213,7 +213,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
if (!val)
return 0;
- pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
+ pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL);
while (pos != MAX_MSI_IRQS_PER_CTRL) {
generic_handle_domain_irq(pp->irq_domain,
(index * MAX_MSI_IRQS_PER_CTRL) + pos);
@@ -697,16 +697,14 @@ static int dra7xx_pcie_probe(struct platform_device *pdev)
struct device_node *np = dev->of_node;
char name[10];
struct gpio_desc *reset;
- const struct of_device_id *match;
const struct dra7xx_pcie_of_data *data;
enum dw_pcie_device_mode mode;
u32 b1co_mode_sel_mask;
- match = of_match_device(of_match_ptr(of_dra7xx_pcie_match), dev);
- if (!match)
+ data = of_device_get_match_data(dev);
+ if (!data)
return -EINVAL;
- data = (struct dra7xx_pcie_of_data *)match->data;
mode = (enum dw_pcie_device_mode)data->mode;
b1co_mode_sel_mask = data->b1co_mode_sel_mask;
diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c
index 722dacdd5a17..467c8d1cd7e4 100644
--- a/drivers/pci/controller/dwc/pci-exynos.c
+++ b/drivers/pci/controller/dwc/pci-exynos.c
@@ -217,10 +217,8 @@ static int exynos_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
{
struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
- if (PCI_SLOT(devfn)) {
- *val = ~0;
+ if (PCI_SLOT(devfn))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
*val = dw_pcie_read_dbi(pci, where, size);
return PCIBIOS_SUCCESSFUL;
diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 26f49f797b0f..6974bd5aa116 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/reset.h>
+#include <linux/phy/phy.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
@@ -49,6 +50,7 @@ enum imx6_pcie_variants {
IMX6QP,
IMX7D,
IMX8MQ,
+ IMX8MM,
};
#define IMX6_PCIE_FLAG_IMX6_PHY BIT(0)
@@ -88,6 +90,7 @@ struct imx6_pcie {
struct device *pd_pcie;
/* power domain for pcie phy */
struct device *pd_pcie_phy;
+ struct phy *phy;
const struct imx6_pcie_drvdata *drvdata;
};
@@ -372,6 +375,8 @@ static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
case IMX7D:
case IMX8MQ:
reset_control_assert(imx6_pcie->pciephy_reset);
+ fallthrough;
+ case IMX8MM:
reset_control_assert(imx6_pcie->apps_reset);
break;
case IMX6SX:
@@ -407,7 +412,8 @@ static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
static unsigned int imx6_pcie_grp_offset(const struct imx6_pcie *imx6_pcie)
{
- WARN_ON(imx6_pcie->drvdata->variant != IMX8MQ);
+ WARN_ON(imx6_pcie->drvdata->variant != IMX8MQ &&
+ imx6_pcie->drvdata->variant != IMX8MM);
return imx6_pcie->controller_id == 1 ? IOMUXC_GPR16 : IOMUXC_GPR14;
}
@@ -446,6 +452,11 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie)
break;
case IMX7D:
break;
+ case IMX8MM:
+ ret = clk_prepare_enable(imx6_pcie->pcie_aux);
+ if (ret)
+ dev_err(dev, "unable to enable pcie_aux clock\n");
+ break;
case IMX8MQ:
ret = clk_prepare_enable(imx6_pcie->pcie_aux);
if (ret) {
@@ -522,6 +533,14 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie)
goto err_ref_clk;
}
+ switch (imx6_pcie->drvdata->variant) {
+ case IMX8MM:
+ if (phy_power_on(imx6_pcie->phy))
+ dev_err(dev, "unable to power on PHY\n");
+ break;
+ default:
+ break;
+ }
/* allow the clocks to stabilize */
usleep_range(200, 500);
@@ -538,6 +557,10 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie)
case IMX8MQ:
reset_control_deassert(imx6_pcie->pciephy_reset);
break;
+ case IMX8MM:
+ if (phy_init(imx6_pcie->phy))
+ dev_err(dev, "waiting for phy ready timeout!\n");
+ break;
case IMX7D:
reset_control_deassert(imx6_pcie->pciephy_reset);
@@ -614,6 +637,12 @@ static void imx6_pcie_configure_type(struct imx6_pcie *imx6_pcie)
static void imx6_pcie_init_phy(struct imx6_pcie *imx6_pcie)
{
switch (imx6_pcie->drvdata->variant) {
+ case IMX8MM:
+ /*
+ * The PHY initialization had been done in the PHY
+ * driver, break here directly.
+ */
+ break;
case IMX8MQ:
/*
* TODO: Currently this code assumes external
@@ -753,6 +782,7 @@ static void imx6_pcie_ltssm_enable(struct device *dev)
break;
case IMX7D:
case IMX8MQ:
+ case IMX8MM:
reset_control_deassert(imx6_pcie->apps_reset);
break;
}
@@ -871,6 +901,7 @@ static void imx6_pcie_ltssm_disable(struct device *dev)
IMX6Q_GPR12_PCIE_CTL_2, 0);
break;
case IMX7D:
+ case IMX8MM:
reset_control_assert(imx6_pcie->apps_reset);
break;
default:
@@ -930,6 +961,7 @@ static void imx6_pcie_clk_disable(struct imx6_pcie *imx6_pcie)
IMX7D_GPR12_PCIE_PHY_REFCLK_SEL);
break;
case IMX8MQ:
+ case IMX8MM:
clk_disable_unprepare(imx6_pcie->pcie_aux);
break;
default:
@@ -945,8 +977,16 @@ static int imx6_pcie_suspend_noirq(struct device *dev)
return 0;
imx6_pcie_pm_turnoff(imx6_pcie);
- imx6_pcie_clk_disable(imx6_pcie);
imx6_pcie_ltssm_disable(dev);
+ imx6_pcie_clk_disable(imx6_pcie);
+ switch (imx6_pcie->drvdata->variant) {
+ case IMX8MM:
+ if (phy_power_off(imx6_pcie->phy))
+ dev_err(dev, "unable to power off PHY\n");
+ break;
+ default:
+ break;
+ }
return 0;
}
@@ -1043,11 +1083,6 @@ static int imx6_pcie_probe(struct platform_device *pdev)
}
/* Fetch clocks */
- imx6_pcie->pcie_phy = devm_clk_get(dev, "pcie_phy");
- if (IS_ERR(imx6_pcie->pcie_phy))
- return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_phy),
- "pcie_phy clock source missing or invalid\n");
-
imx6_pcie->pcie_bus = devm_clk_get(dev, "pcie_bus");
if (IS_ERR(imx6_pcie->pcie_bus))
return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_bus),
@@ -1090,9 +1125,34 @@ static int imx6_pcie_probe(struct platform_device *pdev)
return PTR_ERR(imx6_pcie->apps_reset);
}
break;
+ case IMX8MM:
+ imx6_pcie->pcie_aux = devm_clk_get(dev, "pcie_aux");
+ if (IS_ERR(imx6_pcie->pcie_aux))
+ return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_aux),
+ "pcie_aux clock source missing or invalid\n");
+ imx6_pcie->apps_reset = devm_reset_control_get_exclusive(dev,
+ "apps");
+ if (IS_ERR(imx6_pcie->apps_reset))
+ return dev_err_probe(dev, PTR_ERR(imx6_pcie->apps_reset),
+ "failed to get pcie apps reset control\n");
+
+ imx6_pcie->phy = devm_phy_get(dev, "pcie-phy");
+ if (IS_ERR(imx6_pcie->phy))
+ return dev_err_probe(dev, PTR_ERR(imx6_pcie->phy),
+ "failed to get pcie phy\n");
+
+ break;
default:
break;
}
+ /* Don't fetch the pcie_phy clock, if it has abstract PHY driver */
+ if (imx6_pcie->phy == NULL) {
+ imx6_pcie->pcie_phy = devm_clk_get(dev, "pcie_phy");
+ if (IS_ERR(imx6_pcie->pcie_phy))
+ return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_phy),
+ "pcie_phy clock source missing or invalid\n");
+ }
+
/* Grab turnoff reset */
imx6_pcie->turnoff_reset = devm_reset_control_get_optional_exclusive(dev, "turnoff");
@@ -1202,6 +1262,10 @@ static const struct imx6_pcie_drvdata drvdata[] = {
[IMX8MQ] = {
.variant = IMX8MQ,
},
+ [IMX8MM] = {
+ .variant = IMX8MM,
+ .flags = IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+ },
};
static const struct of_device_id imx6_pcie_of_match[] = {
@@ -1209,7 +1273,8 @@ static const struct of_device_id imx6_pcie_of_match[] = {
{ .compatible = "fsl,imx6sx-pcie", .data = &drvdata[IMX6SX], },
{ .compatible = "fsl,imx6qp-pcie", .data = &drvdata[IMX6QP], },
{ .compatible = "fsl,imx7d-pcie", .data = &drvdata[IMX7D], },
- { .compatible = "fsl,imx8mq-pcie", .data = &drvdata[IMX8MQ], } ,
+ { .compatible = "fsl,imx8mq-pcie", .data = &drvdata[IMX8MQ], },
+ { .compatible = "fsl,imx8mm-pcie", .data = &drvdata[IMX8MM], },
{},
};
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index 865258d8c53c..1c2ee4e13f1c 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -747,9 +747,9 @@ err:
#ifdef CONFIG_ARM
/*
- * When a PCI device does not exist during config cycles, keystone host gets a
- * bus error instead of returning 0xffffffff. This handler always returns 0
- * for this kind of faults.
+ * When a PCI device does not exist during config cycles, keystone host
+ * gets a bus error instead of returning 0xffffffff (PCI_ERROR_RESPONSE).
+ * This handler always returns 0 for this kind of fault.
*/
static int ks_pcie_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
@@ -775,12 +775,19 @@ static int __init ks_pcie_init_id(struct keystone_pcie *ks_pcie)
struct dw_pcie *pci = ks_pcie->pci;
struct device *dev = pci->dev;
struct device_node *np = dev->of_node;
+ struct of_phandle_args args;
+ unsigned int offset = 0;
devctrl_regs = syscon_regmap_lookup_by_phandle(np, "ti,syscon-pcie-id");
if (IS_ERR(devctrl_regs))
return PTR_ERR(devctrl_regs);
- ret = regmap_read(devctrl_regs, 0, &id);
+ /* Do not error out to maintain old DT compatibility */
+ ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-id", 1, 0, &args);
+ if (!ret)
+ offset = args.args[0];
+
+ ret = regmap_read(devctrl_regs, offset, &id);
if (ret)
return ret;
@@ -989,6 +996,8 @@ err_phy:
static int ks_pcie_set_mode(struct device *dev)
{
struct device_node *np = dev->of_node;
+ struct of_phandle_args args;
+ unsigned int offset = 0;
struct regmap *syscon;
u32 val;
u32 mask;
@@ -998,10 +1007,15 @@ static int ks_pcie_set_mode(struct device *dev)
if (IS_ERR(syscon))
return 0;
+ /* Do not error out to maintain old DT compatibility */
+ ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-mode", 1, 0, &args);
+ if (!ret)
+ offset = args.args[0];
+
mask = KS_PCIE_DEV_TYPE_MASK | KS_PCIE_SYSCLOCKOUTEN;
val = KS_PCIE_DEV_TYPE(RC) | KS_PCIE_SYSCLOCKOUTEN;
- ret = regmap_update_bits(syscon, 0, mask, val);
+ ret = regmap_update_bits(syscon, offset, mask, val);
if (ret) {
dev_err(dev, "failed to set pcie mode\n");
return ret;
@@ -1014,6 +1028,8 @@ static int ks_pcie_am654_set_mode(struct device *dev,
enum dw_pcie_device_mode mode)
{
struct device_node *np = dev->of_node;
+ struct of_phandle_args args;
+ unsigned int offset = 0;
struct regmap *syscon;
u32 val;
u32 mask;
@@ -1023,6 +1039,11 @@ static int ks_pcie_am654_set_mode(struct device *dev,
if (IS_ERR(syscon))
return 0;
+ /* Do not error out to maintain old DT compatibility */
+ ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-mode", 1, 0, &args);
+ if (!ret)
+ offset = args.args[0];
+
mask = AM654_PCIE_DEV_TYPE_MASK;
switch (mode) {
@@ -1037,7 +1058,7 @@ static int ks_pcie_am654_set_mode(struct device *dev,
return -EINVAL;
}
- ret = regmap_update_bits(syscon, 0, mask, val);
+ ret = regmap_update_bits(syscon, offset, mask, val);
if (ret) {
dev_err(dev, "failed to set pcie mode\n");
return ret;
@@ -1087,7 +1108,6 @@ static int __init ks_pcie_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = dev->of_node;
const struct ks_pcie_of_data *data;
- const struct of_device_id *match;
enum dw_pcie_device_mode mode;
struct dw_pcie *pci;
struct keystone_pcie *ks_pcie;
@@ -1104,8 +1124,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev)
int irq;
int i;
- match = of_match_device(of_match_ptr(ks_pcie_of_match), dev);
- data = (struct ks_pcie_of_data *)match->data;
+ data = of_device_get_match_data(dev);
if (!data)
return -EINVAL;
diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c
index 5b9c625df7b8..6a4f0619bb1c 100644
--- a/drivers/pci/controller/dwc/pci-layerscape.c
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -3,6 +3,7 @@
* PCIe host controller driver for Freescale Layerscape SoCs
*
* Copyright (C) 2014 Freescale Semiconductor.
+ * Copyright 2021 NXP
*
* Author: Minghuan Lian <Minghuan.Lian@freescale.com>
*/
@@ -22,12 +23,6 @@
#include "pcie-designware.h"
-/* PEX1/2 Misc Ports Status Register */
-#define SCFG_PEXMSCPORTSR(pex_idx) (0x94 + (pex_idx) * 4)
-#define LTSSM_STATE_SHIFT 20
-#define LTSSM_STATE_MASK 0x3f
-#define LTSSM_PCIE_L0 0x11 /* L0 state */
-
/* PEX Internal Configuration Registers */
#define PCIE_STRFMR1 0x71c /* Symbol Timer & Filter Mask Register1 */
#define PCIE_ABSERR 0x8d0 /* Bridge Slave Error Response Register */
@@ -35,20 +30,8 @@
#define PCIE_IATU_NUM 6
-struct ls_pcie_drvdata {
- u32 lut_offset;
- u32 ltssm_shift;
- u32 lut_dbg;
- const struct dw_pcie_host_ops *ops;
- const struct dw_pcie_ops *dw_pcie_ops;
-};
-
struct ls_pcie {
struct dw_pcie *pci;
- void __iomem *lut;
- struct regmap *scfg;
- const struct ls_pcie_drvdata *drvdata;
- int index;
};
#define to_ls_pcie(x) dev_get_drvdata((x)->dev)
@@ -83,38 +66,6 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
iowrite32(val, pci->dbi_base + PCIE_STRFMR1);
}
-static int ls1021_pcie_link_up(struct dw_pcie *pci)
-{
- u32 state;
- struct ls_pcie *pcie = to_ls_pcie(pci);
-
- if (!pcie->scfg)
- return 0;
-
- regmap_read(pcie->scfg, SCFG_PEXMSCPORTSR(pcie->index), &state);
- state = (state >> LTSSM_STATE_SHIFT) & LTSSM_STATE_MASK;
-
- if (state < LTSSM_PCIE_L0)
- return 0;
-
- return 1;
-}
-
-static int ls_pcie_link_up(struct dw_pcie *pci)
-{
- struct ls_pcie *pcie = to_ls_pcie(pci);
- u32 state;
-
- state = (ioread32(pcie->lut + pcie->drvdata->lut_dbg) >>
- pcie->drvdata->ltssm_shift) &
- LTSSM_STATE_MASK;
-
- if (state < LTSSM_PCIE_L0)
- return 0;
-
- return 1;
-}
-
/* Forward error response of outbound non-posted requests */
static void ls_pcie_fix_error_response(struct ls_pcie *pcie)
{
@@ -139,96 +90,20 @@ static int ls_pcie_host_init(struct pcie_port *pp)
return 0;
}
-static int ls1021_pcie_host_init(struct pcie_port *pp)
-{
- struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct ls_pcie *pcie = to_ls_pcie(pci);
- struct device *dev = pci->dev;
- u32 index[2];
- int ret;
-
- pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "fsl,pcie-scfg");
- if (IS_ERR(pcie->scfg)) {
- ret = PTR_ERR(pcie->scfg);
- dev_err(dev, "No syscfg phandle specified\n");
- pcie->scfg = NULL;
- return ret;
- }
-
- if (of_property_read_u32_array(dev->of_node,
- "fsl,pcie-scfg", index, 2)) {
- pcie->scfg = NULL;
- return -EINVAL;
- }
- pcie->index = index[1];
-
- return ls_pcie_host_init(pp);
-}
-
-static const struct dw_pcie_host_ops ls1021_pcie_host_ops = {
- .host_init = ls1021_pcie_host_init,
-};
-
static const struct dw_pcie_host_ops ls_pcie_host_ops = {
.host_init = ls_pcie_host_init,
};
-static const struct dw_pcie_ops dw_ls1021_pcie_ops = {
- .link_up = ls1021_pcie_link_up,
-};
-
-static const struct dw_pcie_ops dw_ls_pcie_ops = {
- .link_up = ls_pcie_link_up,
-};
-
-static const struct ls_pcie_drvdata ls1021_drvdata = {
- .ops = &ls1021_pcie_host_ops,
- .dw_pcie_ops = &dw_ls1021_pcie_ops,
-};
-
-static const struct ls_pcie_drvdata ls1043_drvdata = {
- .lut_offset = 0x10000,
- .ltssm_shift = 24,
- .lut_dbg = 0x7fc,
- .ops = &ls_pcie_host_ops,
- .dw_pcie_ops = &dw_ls_pcie_ops,
-};
-
-static const struct ls_pcie_drvdata ls1046_drvdata = {
- .lut_offset = 0x80000,
- .ltssm_shift = 24,
- .lut_dbg = 0x407fc,
- .ops = &ls_pcie_host_ops,
- .dw_pcie_ops = &dw_ls_pcie_ops,
-};
-
-static const struct ls_pcie_drvdata ls2080_drvdata = {
- .lut_offset = 0x80000,
- .ltssm_shift = 0,
- .lut_dbg = 0x7fc,
- .ops = &ls_pcie_host_ops,
- .dw_pcie_ops = &dw_ls_pcie_ops,
-};
-
-static const struct ls_pcie_drvdata ls2088_drvdata = {
- .lut_offset = 0x80000,
- .ltssm_shift = 0,
- .lut_dbg = 0x407fc,
- .ops = &ls_pcie_host_ops,
- .dw_pcie_ops = &dw_ls_pcie_ops,
-};
-
static const struct of_device_id ls_pcie_of_match[] = {
- { .compatible = "fsl,ls1012a-pcie", .data = &ls1046_drvdata },
- { .compatible = "fsl,ls1021a-pcie", .data = &ls1021_drvdata },
- { .compatible = "fsl,ls1028a-pcie", .data = &ls2088_drvdata },
- { .compatible = "fsl,ls1043a-pcie", .data = &ls1043_drvdata },
- { .compatible = "fsl,ls1046a-pcie", .data = &ls1046_drvdata },
- { .compatible = "fsl,ls2080a-pcie", .data = &ls2080_drvdata },
- { .compatible = "fsl,ls2085a-pcie", .data = &ls2080_drvdata },
- { .compatible = "fsl,ls2088a-pcie", .data = &ls2088_drvdata },
- { .compatible = "fsl,ls1088a-pcie", .data = &ls2088_drvdata },
+ { .compatible = "fsl,ls1012a-pcie", },
+ { .compatible = "fsl,ls1021a-pcie", },
+ { .compatible = "fsl,ls1028a-pcie", },
+ { .compatible = "fsl,ls1043a-pcie", },
+ { .compatible = "fsl,ls1046a-pcie", },
+ { .compatible = "fsl,ls2080a-pcie", },
+ { .compatible = "fsl,ls2085a-pcie", },
+ { .compatible = "fsl,ls2088a-pcie", },
+ { .compatible = "fsl,ls1088a-pcie", },
{ },
};
@@ -247,11 +122,8 @@ static int ls_pcie_probe(struct platform_device *pdev)
if (!pci)
return -ENOMEM;
- pcie->drvdata = of_device_get_match_data(dev);
-
pci->dev = dev;
- pci->ops = pcie->drvdata->dw_pcie_ops;
- pci->pp.ops = pcie->drvdata->ops;
+ pci->pp.ops = &ls_pcie_host_ops;
pcie->pci = pci;
@@ -260,8 +132,6 @@ static int ls_pcie_probe(struct platform_device *pdev)
if (IS_ERR(pci->dbi_base))
return PTR_ERR(pci->dbi_base);
- pcie->lut = pci->dbi_base + pcie->drvdata->lut_offset;
-
if (!ls_pcie_is_bridge(pcie))
return -ENODEV;
diff --git a/drivers/pci/controller/dwc/pcie-artpec6.c b/drivers/pci/controller/dwc/pcie-artpec6.c
index c91fc1954432..2f15441770e1 100644
--- a/drivers/pci/controller/dwc/pcie-artpec6.c
+++ b/drivers/pci/controller/dwc/pcie-artpec6.c
@@ -380,17 +380,15 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
struct dw_pcie *pci;
struct artpec6_pcie *artpec6_pcie;
int ret;
- const struct of_device_id *match;
const struct artpec_pcie_of_data *data;
enum artpec_pcie_variants variant;
enum dw_pcie_device_mode mode;
u32 val;
- match = of_match_device(artpec6_pcie_of_match, dev);
- if (!match)
+ data = of_device_get_match_data(dev);
+ if (!data)
return -EINVAL;
- data = (struct artpec_pcie_of_data *)match->data;
variant = (enum artpec_pcie_variants)data->variant;
mode = (enum dw_pcie_device_mode)data->mode;
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index 8851eb161a0e..0c5de87d3cc6 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -122,15 +122,13 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
struct dw_plat_pcie *dw_plat_pcie;
struct dw_pcie *pci;
int ret;
- const struct of_device_id *match;
const struct dw_plat_pcie_of_data *data;
enum dw_pcie_device_mode mode;
- match = of_match_device(dw_plat_pcie_of_match, dev);
- if (!match)
+ data = of_device_get_match_data(dev);
+ if (!data)
return -EINVAL;
- data = (struct dw_plat_pcie_of_data *)match->data;
mode = (enum dw_pcie_device_mode)data->mode;
dw_plat_pcie = devm_kzalloc(dev, sizeof(*dw_plat_pcie), GFP_KERNEL);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 850b4533f4ef..d92c8a25094f 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -672,10 +672,11 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci)
if (!pci->atu_base) {
struct resource *res =
platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
- if (res)
+ if (res) {
pci->atu_size = resource_size(res);
- pci->atu_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(pci->atu_base))
+ pci->atu_base = devm_ioremap_resource(dev, res);
+ }
+ if (!pci->atu_base || IS_ERR(pci->atu_base))
pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
}
diff --git a/drivers/pci/controller/dwc/pcie-hisi.c b/drivers/pci/controller/dwc/pcie-hisi.c
index 8fc5960faf28..8904b5b85ee5 100644
--- a/drivers/pci/controller/dwc/pcie-hisi.c
+++ b/drivers/pci/controller/dwc/pcie-hisi.c
@@ -18,6 +18,10 @@
#if defined(CONFIG_PCI_HISI) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+struct hisi_pcie {
+ void __iomem *reg_base;
+};
+
static int hisi_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
int size, u32 *val)
{
@@ -58,10 +62,10 @@ static void __iomem *hisi_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
int where)
{
struct pci_config_window *cfg = bus->sysdata;
- void __iomem *reg_base = cfg->priv;
+ struct hisi_pcie *pcie = cfg->priv;
if (bus->number == cfg->busr.start)
- return reg_base + where;
+ return pcie->reg_base + where;
else
return pci_ecam_map_bus(bus, devfn, where);
}
@@ -71,12 +75,16 @@ static void __iomem *hisi_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
static int hisi_pcie_init(struct pci_config_window *cfg)
{
struct device *dev = cfg->parent;
+ struct hisi_pcie *pcie;
struct acpi_device *adev = to_acpi_device(dev);
struct acpi_pci_root *root = acpi_driver_data(adev);
struct resource *res;
- void __iomem *reg_base;
int ret;
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
+ return -ENOMEM;
+
/*
* Retrieve RC base and size from a HISI0081 device with _UID
* matching our segment.
@@ -91,11 +99,11 @@ static int hisi_pcie_init(struct pci_config_window *cfg)
return -ENOMEM;
}
- reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
- if (!reg_base)
+ pcie->reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
+ if (!pcie->reg_base)
return -ENOMEM;
- cfg->priv = reg_base;
+ cfg->priv = pcie;
return 0;
}
@@ -115,9 +123,13 @@ const struct pci_ecam_ops hisi_pcie_ops = {
static int hisi_pcie_platform_init(struct pci_config_window *cfg)
{
struct device *dev = cfg->parent;
+ struct hisi_pcie *pcie;
struct platform_device *pdev = to_platform_device(dev);
struct resource *res;
- void __iomem *reg_base;
+
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
+ return -ENOMEM;
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (!res) {
@@ -125,11 +137,11 @@ static int hisi_pcie_platform_init(struct pci_config_window *cfg)
return -EINVAL;
}
- reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
- if (!reg_base)
+ pcie->reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
+ if (!pcie->reg_base)
return -ENOMEM;
- cfg->priv = reg_base;
+ cfg->priv = pcie;
return 0;
}
diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c
index 86f9d16c50d7..410555dccb6d 100644
--- a/drivers/pci/controller/dwc/pcie-histb.c
+++ b/drivers/pci/controller/dwc/pcie-histb.c
@@ -127,10 +127,8 @@ static int histb_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
{
struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
- if (PCI_SLOT(devfn)) {
- *val = ~0;
+ if (PCI_SLOT(devfn))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
*val = dw_pcie_read_dbi(pci, where, size);
return PCIBIOS_SUCCESSFUL;
diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
index d15cf35fa7f2..5ba144924ff8 100644
--- a/drivers/pci/controller/dwc/pcie-intel-gw.c
+++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
@@ -62,7 +62,7 @@ struct intel_pcie_soc {
unsigned int pcie_ver;
};
-struct intel_pcie_port {
+struct intel_pcie {
struct dw_pcie pci;
void __iomem *app_base;
struct gpio_desc *reset_gpio;
@@ -83,53 +83,53 @@ static void pcie_update_bits(void __iomem *base, u32 ofs, u32 mask, u32 val)
writel(val, base + ofs);
}
-static inline void pcie_app_wr(struct intel_pcie_port *lpp, u32 ofs, u32 val)
+static inline void pcie_app_wr(struct intel_pcie *pcie, u32 ofs, u32 val)
{
- writel(val, lpp->app_base + ofs);
+ writel(val, pcie->app_base + ofs);
}
-static void pcie_app_wr_mask(struct intel_pcie_port *lpp, u32 ofs,
+static void pcie_app_wr_mask(struct intel_pcie *pcie, u32 ofs,
u32 mask, u32 val)
{
- pcie_update_bits(lpp->app_base, ofs, mask, val);
+ pcie_update_bits(pcie->app_base, ofs, mask, val);
}
-static inline u32 pcie_rc_cfg_rd(struct intel_pcie_port *lpp, u32 ofs)
+static inline u32 pcie_rc_cfg_rd(struct intel_pcie *pcie, u32 ofs)
{
- return dw_pcie_readl_dbi(&lpp->pci, ofs);
+ return dw_pcie_readl_dbi(&pcie->pci, ofs);
}
-static inline void pcie_rc_cfg_wr(struct intel_pcie_port *lpp, u32 ofs, u32 val)
+static inline void pcie_rc_cfg_wr(struct intel_pcie *pcie, u32 ofs, u32 val)
{
- dw_pcie_writel_dbi(&lpp->pci, ofs, val);
+ dw_pcie_writel_dbi(&pcie->pci, ofs, val);
}
-static void pcie_rc_cfg_wr_mask(struct intel_pcie_port *lpp, u32 ofs,
+static void pcie_rc_cfg_wr_mask(struct intel_pcie *pcie, u32 ofs,
u32 mask, u32 val)
{
- pcie_update_bits(lpp->pci.dbi_base, ofs, mask, val);
+ pcie_update_bits(pcie->pci.dbi_base, ofs, mask, val);
}
-static void intel_pcie_ltssm_enable(struct intel_pcie_port *lpp)
+static void intel_pcie_ltssm_enable(struct intel_pcie *pcie)
{
- pcie_app_wr_mask(lpp, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE,
+ pcie_app_wr_mask(pcie, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE,
PCIE_APP_CCR_LTSSM_ENABLE);
}
-static void intel_pcie_ltssm_disable(struct intel_pcie_port *lpp)
+static void intel_pcie_ltssm_disable(struct intel_pcie *pcie)
{
- pcie_app_wr_mask(lpp, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE, 0);
+ pcie_app_wr_mask(pcie, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE, 0);
}
-static void intel_pcie_link_setup(struct intel_pcie_port *lpp)
+static void intel_pcie_link_setup(struct intel_pcie *pcie)
{
u32 val;
- u8 offset = dw_pcie_find_capability(&lpp->pci, PCI_CAP_ID_EXP);
+ u8 offset = dw_pcie_find_capability(&pcie->pci, PCI_CAP_ID_EXP);
- val = pcie_rc_cfg_rd(lpp, offset + PCI_EXP_LNKCTL);
+ val = pcie_rc_cfg_rd(pcie, offset + PCI_EXP_LNKCTL);
val &= ~(PCI_EXP_LNKCTL_LD | PCI_EXP_LNKCTL_ASPMC);
- pcie_rc_cfg_wr(lpp, offset + PCI_EXP_LNKCTL, val);
+ pcie_rc_cfg_wr(pcie, offset + PCI_EXP_LNKCTL, val);
}
static void intel_pcie_init_n_fts(struct dw_pcie *pci)
@@ -148,14 +148,14 @@ static void intel_pcie_init_n_fts(struct dw_pcie *pci)
pci->n_fts[0] = PORT_AFR_N_FTS_GEN12_DFT;
}
-static int intel_pcie_ep_rst_init(struct intel_pcie_port *lpp)
+static int intel_pcie_ep_rst_init(struct intel_pcie *pcie)
{
- struct device *dev = lpp->pci.dev;
+ struct device *dev = pcie->pci.dev;
int ret;
- lpp->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
- if (IS_ERR(lpp->reset_gpio)) {
- ret = PTR_ERR(lpp->reset_gpio);
+ pcie->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(pcie->reset_gpio)) {
+ ret = PTR_ERR(pcie->reset_gpio);
if (ret != -EPROBE_DEFER)
dev_err(dev, "Failed to request PCIe GPIO: %d\n", ret);
return ret;
@@ -167,19 +167,19 @@ static int intel_pcie_ep_rst_init(struct intel_pcie_port *lpp)
return 0;
}
-static void intel_pcie_core_rst_assert(struct intel_pcie_port *lpp)
+static void intel_pcie_core_rst_assert(struct intel_pcie *pcie)
{
- reset_control_assert(lpp->core_rst);
+ reset_control_assert(pcie->core_rst);
}
-static void intel_pcie_core_rst_deassert(struct intel_pcie_port *lpp)
+static void intel_pcie_core_rst_deassert(struct intel_pcie *pcie)
{
/*
* One micro-second delay to make sure the reset pulse
* wide enough so that core reset is clean.
*/
udelay(1);
- reset_control_deassert(lpp->core_rst);
+ reset_control_deassert(pcie->core_rst);
/*
* Some SoC core reset also reset PHY, more delay needed
@@ -188,58 +188,58 @@ static void intel_pcie_core_rst_deassert(struct intel_pcie_port *lpp)
usleep_range(1000, 2000);
}
-static void intel_pcie_device_rst_assert(struct intel_pcie_port *lpp)
+static void intel_pcie_device_rst_assert(struct intel_pcie *pcie)
{
- gpiod_set_value_cansleep(lpp->reset_gpio, 1);
+ gpiod_set_value_cansleep(pcie->reset_gpio, 1);
}
-static void intel_pcie_device_rst_deassert(struct intel_pcie_port *lpp)
+static void intel_pcie_device_rst_deassert(struct intel_pcie *pcie)
{
- msleep(lpp->rst_intrvl);
- gpiod_set_value_cansleep(lpp->reset_gpio, 0);
+ msleep(pcie->rst_intrvl);
+ gpiod_set_value_cansleep(pcie->reset_gpio, 0);
}
-static void intel_pcie_core_irq_disable(struct intel_pcie_port *lpp)
+static void intel_pcie_core_irq_disable(struct intel_pcie *pcie)
{
- pcie_app_wr(lpp, PCIE_APP_IRNEN, 0);
- pcie_app_wr(lpp, PCIE_APP_IRNCR, PCIE_APP_IRN_INT);
+ pcie_app_wr(pcie, PCIE_APP_IRNEN, 0);
+ pcie_app_wr(pcie, PCIE_APP_IRNCR, PCIE_APP_IRN_INT);
}
static int intel_pcie_get_resources(struct platform_device *pdev)
{
- struct intel_pcie_port *lpp = platform_get_drvdata(pdev);
- struct dw_pcie *pci = &lpp->pci;
+ struct intel_pcie *pcie = platform_get_drvdata(pdev);
+ struct dw_pcie *pci = &pcie->pci;
struct device *dev = pci->dev;
int ret;
- lpp->core_clk = devm_clk_get(dev, NULL);
- if (IS_ERR(lpp->core_clk)) {
- ret = PTR_ERR(lpp->core_clk);
+ pcie->core_clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(pcie->core_clk)) {
+ ret = PTR_ERR(pcie->core_clk);
if (ret != -EPROBE_DEFER)
dev_err(dev, "Failed to get clks: %d\n", ret);
return ret;
}
- lpp->core_rst = devm_reset_control_get(dev, NULL);
- if (IS_ERR(lpp->core_rst)) {
- ret = PTR_ERR(lpp->core_rst);
+ pcie->core_rst = devm_reset_control_get(dev, NULL);
+ if (IS_ERR(pcie->core_rst)) {
+ ret = PTR_ERR(pcie->core_rst);
if (ret != -EPROBE_DEFER)
dev_err(dev, "Failed to get resets: %d\n", ret);
return ret;
}
ret = device_property_read_u32(dev, "reset-assert-ms",
- &lpp->rst_intrvl);
+ &pcie->rst_intrvl);
if (ret)
- lpp->rst_intrvl = RESET_INTERVAL_MS;
+ pcie->rst_intrvl = RESET_INTERVAL_MS;
- lpp->app_base = devm_platform_ioremap_resource_byname(pdev, "app");
- if (IS_ERR(lpp->app_base))
- return PTR_ERR(lpp->app_base);
+ pcie->app_base = devm_platform_ioremap_resource_byname(pdev, "app");
+ if (IS_ERR(pcie->app_base))
+ return PTR_ERR(pcie->app_base);
- lpp->phy = devm_phy_get(dev, "pcie");
- if (IS_ERR(lpp->phy)) {
- ret = PTR_ERR(lpp->phy);
+ pcie->phy = devm_phy_get(dev, "pcie");
+ if (IS_ERR(pcie->phy)) {
+ ret = PTR_ERR(pcie->phy);
if (ret != -EPROBE_DEFER)
dev_err(dev, "Couldn't get pcie-phy: %d\n", ret);
return ret;
@@ -248,137 +248,137 @@ static int intel_pcie_get_resources(struct platform_device *pdev)
return 0;
}
-static int intel_pcie_wait_l2(struct intel_pcie_port *lpp)
+static int intel_pcie_wait_l2(struct intel_pcie *pcie)
{
u32 value;
int ret;
- struct dw_pcie *pci = &lpp->pci;
+ struct dw_pcie *pci = &pcie->pci;
if (pci->link_gen < 3)
return 0;
/* Send PME_TURN_OFF message */
- pcie_app_wr_mask(lpp, PCIE_APP_MSG_CR, PCIE_APP_MSG_XMT_PM_TURNOFF,
+ pcie_app_wr_mask(pcie, PCIE_APP_MSG_CR, PCIE_APP_MSG_XMT_PM_TURNOFF,
PCIE_APP_MSG_XMT_PM_TURNOFF);
/* Read PMC status and wait for falling into L2 link state */
- ret = readl_poll_timeout(lpp->app_base + PCIE_APP_PMC, value,
+ ret = readl_poll_timeout(pcie->app_base + PCIE_APP_PMC, value,
value & PCIE_APP_PMC_IN_L2, 20,
jiffies_to_usecs(5 * HZ));
if (ret)
- dev_err(lpp->pci.dev, "PCIe link enter L2 timeout!\n");
+ dev_err(pcie->pci.dev, "PCIe link enter L2 timeout!\n");
return ret;
}
-static void intel_pcie_turn_off(struct intel_pcie_port *lpp)
+static void intel_pcie_turn_off(struct intel_pcie *pcie)
{
- if (dw_pcie_link_up(&lpp->pci))
- intel_pcie_wait_l2(lpp);
+ if (dw_pcie_link_up(&pcie->pci))
+ intel_pcie_wait_l2(pcie);
/* Put endpoint device in reset state */
- intel_pcie_device_rst_assert(lpp);
- pcie_rc_cfg_wr_mask(lpp, PCI_COMMAND, PCI_COMMAND_MEMORY, 0);
+ intel_pcie_device_rst_assert(pcie);
+ pcie_rc_cfg_wr_mask(pcie, PCI_COMMAND, PCI_COMMAND_MEMORY, 0);
}
-static int intel_pcie_host_setup(struct intel_pcie_port *lpp)
+static int intel_pcie_host_setup(struct intel_pcie *pcie)
{
int ret;
- struct dw_pcie *pci = &lpp->pci;
+ struct dw_pcie *pci = &pcie->pci;
- intel_pcie_core_rst_assert(lpp);
- intel_pcie_device_rst_assert(lpp);
+ intel_pcie_core_rst_assert(pcie);
+ intel_pcie_device_rst_assert(pcie);
- ret = phy_init(lpp->phy);
+ ret = phy_init(pcie->phy);
if (ret)
return ret;
- intel_pcie_core_rst_deassert(lpp);
+ intel_pcie_core_rst_deassert(pcie);
- ret = clk_prepare_enable(lpp->core_clk);
+ ret = clk_prepare_enable(pcie->core_clk);
if (ret) {
- dev_err(lpp->pci.dev, "Core clock enable failed: %d\n", ret);
+ dev_err(pcie->pci.dev, "Core clock enable failed: %d\n", ret);
goto clk_err;
}
pci->atu_base = pci->dbi_base + 0xC0000;
- intel_pcie_ltssm_disable(lpp);
- intel_pcie_link_setup(lpp);
+ intel_pcie_ltssm_disable(pcie);
+ intel_pcie_link_setup(pcie);
intel_pcie_init_n_fts(pci);
dw_pcie_setup_rc(&pci->pp);
dw_pcie_upconfig_setup(pci);
- intel_pcie_device_rst_deassert(lpp);
- intel_pcie_ltssm_enable(lpp);
+ intel_pcie_device_rst_deassert(pcie);
+ intel_pcie_ltssm_enable(pcie);
ret = dw_pcie_wait_for_link(pci);
if (ret)
goto app_init_err;
/* Enable integrated interrupts */
- pcie_app_wr_mask(lpp, PCIE_APP_IRNEN, PCIE_APP_IRN_INT,
+ pcie_app_wr_mask(pcie, PCIE_APP_IRNEN, PCIE_APP_IRN_INT,
PCIE_APP_IRN_INT);
return 0;
app_init_err:
- clk_disable_unprepare(lpp->core_clk);
+ clk_disable_unprepare(pcie->core_clk);
clk_err:
- intel_pcie_core_rst_assert(lpp);
- phy_exit(lpp->phy);
+ intel_pcie_core_rst_assert(pcie);
+ phy_exit(pcie->phy);
return ret;
}
-static void __intel_pcie_remove(struct intel_pcie_port *lpp)
+static void __intel_pcie_remove(struct intel_pcie *pcie)
{
- intel_pcie_core_irq_disable(lpp);
- intel_pcie_turn_off(lpp);
- clk_disable_unprepare(lpp->core_clk);
- intel_pcie_core_rst_assert(lpp);
- phy_exit(lpp->phy);
+ intel_pcie_core_irq_disable(pcie);
+ intel_pcie_turn_off(pcie);
+ clk_disable_unprepare(pcie->core_clk);
+ intel_pcie_core_rst_assert(pcie);
+ phy_exit(pcie->phy);
}
static int intel_pcie_remove(struct platform_device *pdev)
{
- struct intel_pcie_port *lpp = platform_get_drvdata(pdev);
- struct pcie_port *pp = &lpp->pci.pp;
+ struct intel_pcie *pcie = platform_get_drvdata(pdev);
+ struct pcie_port *pp = &pcie->pci.pp;
dw_pcie_host_deinit(pp);
- __intel_pcie_remove(lpp);
+ __intel_pcie_remove(pcie);
return 0;
}
static int __maybe_unused intel_pcie_suspend_noirq(struct device *dev)
{
- struct intel_pcie_port *lpp = dev_get_drvdata(dev);
+ struct intel_pcie *pcie = dev_get_drvdata(dev);
int ret;
- intel_pcie_core_irq_disable(lpp);
- ret = intel_pcie_wait_l2(lpp);
+ intel_pcie_core_irq_disable(pcie);
+ ret = intel_pcie_wait_l2(pcie);
if (ret)
return ret;
- phy_exit(lpp->phy);
- clk_disable_unprepare(lpp->core_clk);
+ phy_exit(pcie->phy);
+ clk_disable_unprepare(pcie->core_clk);
return ret;
}
static int __maybe_unused intel_pcie_resume_noirq(struct device *dev)
{
- struct intel_pcie_port *lpp = dev_get_drvdata(dev);
+ struct intel_pcie *pcie = dev_get_drvdata(dev);
- return intel_pcie_host_setup(lpp);
+ return intel_pcie_host_setup(pcie);
}
static int intel_pcie_rc_init(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct intel_pcie_port *lpp = dev_get_drvdata(pci->dev);
+ struct intel_pcie *pcie = dev_get_drvdata(pci->dev);
- return intel_pcie_host_setup(lpp);
+ return intel_pcie_host_setup(pcie);
}
static u64 intel_pcie_cpu_addr(struct dw_pcie *pcie, u64 cpu_addr)
@@ -402,17 +402,17 @@ static int intel_pcie_probe(struct platform_device *pdev)
{
const struct intel_pcie_soc *data;
struct device *dev = &pdev->dev;
- struct intel_pcie_port *lpp;
+ struct intel_pcie *pcie;
struct pcie_port *pp;
struct dw_pcie *pci;
int ret;
- lpp = devm_kzalloc(dev, sizeof(*lpp), GFP_KERNEL);
- if (!lpp)
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
return -ENOMEM;
- platform_set_drvdata(pdev, lpp);
- pci = &lpp->pci;
+ platform_set_drvdata(pdev, pcie);
+ pci = &pcie->pci;
pci->dev = dev;
pp = &pci->pp;
@@ -420,7 +420,7 @@ static int intel_pcie_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = intel_pcie_ep_rst_init(lpp);
+ ret = intel_pcie_ep_rst_init(pcie);
if (ret)
return ret;
diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c
index 095afbccf9c1..fa6886d66488 100644
--- a/drivers/pci/controller/dwc/pcie-kirin.c
+++ b/drivers/pci/controller/dwc/pcie-kirin.c
@@ -530,10 +530,8 @@ static int kirin_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
{
struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
- if (PCI_SLOT(devfn)) {
- *val = ~0;
+ if (PCI_SLOT(devfn))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
*val = dw_pcie_read_dbi(pci, where, size);
return PCIBIOS_SUCCESSFUL;
@@ -773,7 +771,6 @@ static const struct of_device_id kirin_pcie_match[] = {
static int kirin_pcie_probe(struct platform_device *pdev)
{
enum pcie_kirin_phy_type phy_type;
- const struct of_device_id *of_id;
struct device *dev = &pdev->dev;
struct kirin_pcie *kirin_pcie;
struct dw_pcie *pci;
@@ -784,13 +781,12 @@ static int kirin_pcie_probe(struct platform_device *pdev)
return -EINVAL;
}
- of_id = of_match_device(kirin_pcie_match, dev);
- if (!of_id) {
+ phy_type = (long)of_device_get_match_data(dev);
+ if (!phy_type) {
dev_err(dev, "OF data missing\n");
return -EINVAL;
}
- phy_type = (long)of_id->data;
kirin_pcie = devm_kzalloc(dev, sizeof(struct kirin_pcie), GFP_KERNEL);
if (!kirin_pcie)
diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
index cfe66bf04c1d..6ce8eddf3a37 100644
--- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
+++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
@@ -553,10 +553,8 @@ static int qcom_pcie_ep_enable_irq_resources(struct platform_device *pdev,
int irq, ret;
irq = platform_get_irq_byname(pdev, "global");
- if (irq < 0) {
- dev_err(&pdev->dev, "Failed to get Global IRQ\n");
+ if (irq < 0)
return irq;
- }
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
qcom_pcie_ep_global_irq_thread,
@@ -620,7 +618,7 @@ static void qcom_pcie_ep_init(struct dw_pcie_ep *ep)
dw_pcie_ep_reset_bar(pci, bar);
}
-static struct dw_pcie_ep_ops pci_ep_ops = {
+static const struct dw_pcie_ep_ops pci_ep_ops = {
.ep_init = qcom_pcie_ep_init,
.raise_irq = qcom_pcie_ep_raise_irq,
.get_features = qcom_pcie_epc_get_features,
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 1c3d1116bb60..c19cd506ed3f 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1343,7 +1343,7 @@ static int qcom_pcie_config_sid_sm8250(struct qcom_pcie *pcie)
/* Look for an available entry to hold the mapping */
for (i = 0; i < nr_map; i++) {
- u16 bdf_be = cpu_to_be16(map[i].bdf);
+ __be16 bdf_be = cpu_to_be16(map[i].bdf);
u32 val;
u8 hash;
@@ -1534,6 +1534,12 @@ static int qcom_pcie_probe(struct platform_device *pdev)
const struct qcom_pcie_cfg *pcie_cfg;
int ret;
+ pcie_cfg = of_device_get_match_data(dev);
+ if (!pcie_cfg || !pcie_cfg->ops) {
+ dev_err(dev, "Invalid platform data\n");
+ return -EINVAL;
+ }
+
pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
if (!pcie)
return -ENOMEM;
@@ -1553,12 +1559,6 @@ static int qcom_pcie_probe(struct platform_device *pdev)
pcie->pci = pci;
- pcie_cfg = of_device_get_match_data(dev);
- if (!pcie_cfg || !pcie_cfg->ops) {
- dev_err(dev, "Invalid platform data\n");
- return -EINVAL;
- }
-
pcie->ops = pcie_cfg->ops;
pcie->pipe_clk_need_muxing = pcie_cfg->pipe_clk_need_muxing;
diff --git a/drivers/pci/controller/dwc/pcie-spear13xx.c b/drivers/pci/controller/dwc/pcie-spear13xx.c
index 1a9e353bef55..1569e82b5568 100644
--- a/drivers/pci/controller/dwc/pcie-spear13xx.c
+++ b/drivers/pci/controller/dwc/pcie-spear13xx.c
@@ -69,7 +69,7 @@ struct pcie_app_reg {
static int spear13xx_pcie_start_link(struct dw_pcie *pci)
{
struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
- struct pcie_app_reg *app_reg = spear13xx_pcie->app_base;
+ struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
/* enable ltssm */
writel(DEVICE_TYPE_RC | (1 << MISCTRL_EN_ID)
@@ -83,7 +83,7 @@ static int spear13xx_pcie_start_link(struct dw_pcie *pci)
static irqreturn_t spear13xx_pcie_irq_handler(int irq, void *arg)
{
struct spear13xx_pcie *spear13xx_pcie = arg;
- struct pcie_app_reg *app_reg = spear13xx_pcie->app_base;
+ struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
struct dw_pcie *pci = spear13xx_pcie->pci;
struct pcie_port *pp = &pci->pp;
unsigned int status;
@@ -102,7 +102,7 @@ static irqreturn_t spear13xx_pcie_irq_handler(int irq, void *arg)
static void spear13xx_pcie_enable_interrupts(struct spear13xx_pcie *spear13xx_pcie)
{
- struct pcie_app_reg *app_reg = spear13xx_pcie->app_base;
+ struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
/* Enable MSI interrupt */
if (IS_ENABLED(CONFIG_PCI_MSI))
@@ -113,7 +113,7 @@ static void spear13xx_pcie_enable_interrupts(struct spear13xx_pcie *spear13xx_pc
static int spear13xx_pcie_link_up(struct dw_pcie *pci)
{
struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
- struct pcie_app_reg *app_reg = spear13xx_pcie->app_base;
+ struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
if (readl(&app_reg->app_status_1) & XMLH_LINK_UP)
return 1;
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index 904976913081..b1b5f836a806 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -245,7 +245,7 @@ static const unsigned int pcie_gen_freq[] = {
GEN4_CORE_CLK_FREQ
};
-struct tegra_pcie_dw {
+struct tegra194_pcie {
struct device *dev;
struct resource *appl_res;
struct resource *dbi_res;
@@ -289,22 +289,22 @@ struct tegra_pcie_dw {
int ep_state;
};
-struct tegra_pcie_dw_of_data {
+struct tegra194_pcie_of_data {
enum dw_pcie_device_mode mode;
};
-static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
+static inline struct tegra194_pcie *to_tegra_pcie(struct dw_pcie *pci)
{
- return container_of(pci, struct tegra_pcie_dw, pci);
+ return container_of(pci, struct tegra194_pcie, pci);
}
-static inline void appl_writel(struct tegra_pcie_dw *pcie, const u32 value,
+static inline void appl_writel(struct tegra194_pcie *pcie, const u32 value,
const u32 reg)
{
writel_relaxed(value, pcie->appl_base + reg);
}
-static inline u32 appl_readl(struct tegra_pcie_dw *pcie, const u32 reg)
+static inline u32 appl_readl(struct tegra194_pcie *pcie, const u32 reg)
{
return readl_relaxed(pcie->appl_base + reg);
}
@@ -316,7 +316,7 @@ struct tegra_pcie_soc {
static void apply_bad_link_workaround(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 current_link_width;
u16 val;
@@ -349,7 +349,7 @@ static void apply_bad_link_workaround(struct pcie_port *pp)
static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
{
- struct tegra_pcie_dw *pcie = arg;
+ struct tegra194_pcie *pcie = arg;
struct dw_pcie *pci = &pcie->pci;
struct pcie_port *pp = &pci->pp;
u32 val, tmp;
@@ -420,7 +420,7 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
return IRQ_HANDLED;
}
-static void pex_ep_event_hot_rst_done(struct tegra_pcie_dw *pcie)
+static void pex_ep_event_hot_rst_done(struct tegra194_pcie *pcie)
{
u32 val;
@@ -448,7 +448,7 @@ static void pex_ep_event_hot_rst_done(struct tegra_pcie_dw *pcie)
static irqreturn_t tegra_pcie_ep_irq_thread(int irq, void *arg)
{
- struct tegra_pcie_dw *pcie = arg;
+ struct tegra194_pcie *pcie = arg;
struct dw_pcie *pci = &pcie->pci;
u32 val, speed;
@@ -494,7 +494,7 @@ static irqreturn_t tegra_pcie_ep_irq_thread(int irq, void *arg)
static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
{
- struct tegra_pcie_dw *pcie = arg;
+ struct tegra194_pcie *pcie = arg;
struct dw_pcie_ep *ep = &pcie->pci.ep;
int spurious = 1;
u32 status_l0, status_l1, link_status;
@@ -537,7 +537,7 @@ static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
return IRQ_HANDLED;
}
-static int tegra_pcie_dw_rd_own_conf(struct pci_bus *bus, u32 devfn, int where,
+static int tegra194_pcie_rd_own_conf(struct pci_bus *bus, u32 devfn, int where,
int size, u32 *val)
{
/*
@@ -554,7 +554,7 @@ static int tegra_pcie_dw_rd_own_conf(struct pci_bus *bus, u32 devfn, int where,
return pci_generic_config_read(bus, devfn, where, size, val);
}
-static int tegra_pcie_dw_wr_own_conf(struct pci_bus *bus, u32 devfn, int where,
+static int tegra194_pcie_wr_own_conf(struct pci_bus *bus, u32 devfn, int where,
int size, u32 val)
{
/*
@@ -571,8 +571,8 @@ static int tegra_pcie_dw_wr_own_conf(struct pci_bus *bus, u32 devfn, int where,
static struct pci_ops tegra_pci_ops = {
.map_bus = dw_pcie_own_conf_map_bus,
- .read = tegra_pcie_dw_rd_own_conf,
- .write = tegra_pcie_dw_wr_own_conf,
+ .read = tegra194_pcie_rd_own_conf,
+ .write = tegra194_pcie_wr_own_conf,
};
#if defined(CONFIG_PCIEASPM)
@@ -594,7 +594,7 @@ static const u32 event_cntr_data_offset[] = {
0x1dc
};
-static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
+static void disable_aspm_l11(struct tegra194_pcie *pcie)
{
u32 val;
@@ -603,7 +603,7 @@ static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
}
-static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
+static void disable_aspm_l12(struct tegra194_pcie *pcie)
{
u32 val;
@@ -612,7 +612,7 @@ static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
}
-static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
+static inline u32 event_counter_prog(struct tegra194_pcie *pcie, u32 event)
{
u32 val;
@@ -629,7 +629,7 @@ static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
static int aspm_state_cnt(struct seq_file *s, void *data)
{
- struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)
+ struct tegra194_pcie *pcie = (struct tegra194_pcie *)
dev_get_drvdata(s->private);
u32 val;
@@ -660,7 +660,7 @@ static int aspm_state_cnt(struct seq_file *s, void *data)
return 0;
}
-static void init_host_aspm(struct tegra_pcie_dw *pcie)
+static void init_host_aspm(struct tegra194_pcie *pcie)
{
struct dw_pcie *pci = &pcie->pci;
u32 val;
@@ -688,22 +688,22 @@ static void init_host_aspm(struct tegra_pcie_dw *pcie)
dw_pcie_writel_dbi(pci, PCIE_PORT_AFR, val);
}
-static void init_debugfs(struct tegra_pcie_dw *pcie)
+static void init_debugfs(struct tegra194_pcie *pcie)
{
debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt", pcie->debugfs,
aspm_state_cnt);
}
#else
-static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
-static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
-static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
-static inline void init_debugfs(struct tegra_pcie_dw *pcie) { return; }
+static inline void disable_aspm_l12(struct tegra194_pcie *pcie) { return; }
+static inline void disable_aspm_l11(struct tegra194_pcie *pcie) { return; }
+static inline void init_host_aspm(struct tegra194_pcie *pcie) { return; }
+static inline void init_debugfs(struct tegra194_pcie *pcie) { return; }
#endif
static void tegra_pcie_enable_system_interrupts(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 val;
u16 val_w;
@@ -741,7 +741,7 @@ static void tegra_pcie_enable_system_interrupts(struct pcie_port *pp)
static void tegra_pcie_enable_legacy_interrupts(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 val;
/* Enable legacy interrupt generation */
@@ -762,7 +762,7 @@ static void tegra_pcie_enable_legacy_interrupts(struct pcie_port *pp)
static void tegra_pcie_enable_msi_interrupts(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 val;
/* Enable MSI interrupt generation */
@@ -775,7 +775,7 @@ static void tegra_pcie_enable_msi_interrupts(struct pcie_port *pp)
static void tegra_pcie_enable_interrupts(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
/* Clear interrupt statuses before enabling interrupts */
appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
@@ -800,7 +800,7 @@ static void tegra_pcie_enable_interrupts(struct pcie_port *pp)
tegra_pcie_enable_msi_interrupts(pp);
}
-static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
+static void config_gen3_gen4_eq_presets(struct tegra194_pcie *pcie)
{
struct dw_pcie *pci = &pcie->pci;
u32 val, offset, i;
@@ -853,10 +853,10 @@ static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
}
-static int tegra_pcie_dw_host_init(struct pcie_port *pp)
+static int tegra194_pcie_host_init(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 val;
pp->bridge->ops = &tegra_pci_ops;
@@ -914,10 +914,10 @@ static int tegra_pcie_dw_host_init(struct pcie_port *pp)
return 0;
}
-static int tegra_pcie_dw_start_link(struct dw_pcie *pci)
+static int tegra194_pcie_start_link(struct dw_pcie *pci)
{
u32 val, offset, speed, tmp;
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
struct pcie_port *pp = &pci->pp;
bool retry = true;
@@ -982,7 +982,7 @@ retry_link:
val &= ~PCI_DLF_EXCHANGE_ENABLE;
dw_pcie_writel_dbi(pci, offset, val);
- tegra_pcie_dw_host_init(pp);
+ tegra194_pcie_host_init(pp);
dw_pcie_setup_rc(pp);
retry = false;
@@ -998,32 +998,32 @@ retry_link:
return 0;
}
-static int tegra_pcie_dw_link_up(struct dw_pcie *pci)
+static int tegra194_pcie_link_up(struct dw_pcie *pci)
{
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
u32 val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
return !!(val & PCI_EXP_LNKSTA_DLLLA);
}
-static void tegra_pcie_dw_stop_link(struct dw_pcie *pci)
+static void tegra194_pcie_stop_link(struct dw_pcie *pci)
{
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
disable_irq(pcie->pex_rst_irq);
}
static const struct dw_pcie_ops tegra_dw_pcie_ops = {
- .link_up = tegra_pcie_dw_link_up,
- .start_link = tegra_pcie_dw_start_link,
- .stop_link = tegra_pcie_dw_stop_link,
+ .link_up = tegra194_pcie_link_up,
+ .start_link = tegra194_pcie_start_link,
+ .stop_link = tegra194_pcie_stop_link,
};
-static const struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
- .host_init = tegra_pcie_dw_host_init,
+static const struct dw_pcie_host_ops tegra194_pcie_host_ops = {
+ .host_init = tegra194_pcie_host_init,
};
-static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
+static void tegra_pcie_disable_phy(struct tegra194_pcie *pcie)
{
unsigned int phy_count = pcie->phy_count;
@@ -1033,7 +1033,7 @@ static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
}
}
-static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_enable_phy(struct tegra194_pcie *pcie)
{
unsigned int i;
int ret;
@@ -1060,7 +1060,7 @@ phy_exit:
return ret;
}
-static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
+static int tegra194_pcie_parse_dt(struct tegra194_pcie *pcie)
{
struct platform_device *pdev = to_platform_device(pcie->dev);
struct device_node *np = pcie->dev->of_node;
@@ -1156,7 +1156,7 @@ static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
return 0;
}
-static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
+static int tegra_pcie_bpmp_set_ctrl_state(struct tegra194_pcie *pcie,
bool enable)
{
struct mrq_uphy_response resp;
@@ -1184,7 +1184,7 @@ static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
return tegra_bpmp_transfer(pcie->bpmp, &msg);
}
-static int tegra_pcie_bpmp_set_pll_state(struct tegra_pcie_dw *pcie,
+static int tegra_pcie_bpmp_set_pll_state(struct tegra194_pcie *pcie,
bool enable)
{
struct mrq_uphy_response resp;
@@ -1212,7 +1212,7 @@ static int tegra_pcie_bpmp_set_pll_state(struct tegra_pcie_dw *pcie,
return tegra_bpmp_transfer(pcie->bpmp, &msg);
}
-static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
+static void tegra_pcie_downstream_dev_to_D0(struct tegra194_pcie *pcie)
{
struct pcie_port *pp = &pcie->pci.pp;
struct pci_bus *child, *root_bus = NULL;
@@ -1250,7 +1250,7 @@ static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
}
}
-static int tegra_pcie_get_slot_regulators(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_get_slot_regulators(struct tegra194_pcie *pcie)
{
pcie->slot_ctl_3v3 = devm_regulator_get_optional(pcie->dev, "vpcie3v3");
if (IS_ERR(pcie->slot_ctl_3v3)) {
@@ -1271,7 +1271,7 @@ static int tegra_pcie_get_slot_regulators(struct tegra_pcie_dw *pcie)
return 0;
}
-static int tegra_pcie_enable_slot_regulators(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_enable_slot_regulators(struct tegra194_pcie *pcie)
{
int ret;
@@ -1309,7 +1309,7 @@ fail_12v_enable:
return ret;
}
-static void tegra_pcie_disable_slot_regulators(struct tegra_pcie_dw *pcie)
+static void tegra_pcie_disable_slot_regulators(struct tegra194_pcie *pcie)
{
if (pcie->slot_ctl_12v)
regulator_disable(pcie->slot_ctl_12v);
@@ -1317,7 +1317,7 @@ static void tegra_pcie_disable_slot_regulators(struct tegra_pcie_dw *pcie)
regulator_disable(pcie->slot_ctl_3v3);
}
-static int tegra_pcie_config_controller(struct tegra_pcie_dw *pcie,
+static int tegra_pcie_config_controller(struct tegra194_pcie *pcie,
bool en_hw_hot_rst)
{
int ret;
@@ -1414,7 +1414,7 @@ fail_slot_reg_en:
return ret;
}
-static void tegra_pcie_unconfig_controller(struct tegra_pcie_dw *pcie)
+static void tegra_pcie_unconfig_controller(struct tegra194_pcie *pcie)
{
int ret;
@@ -1442,7 +1442,7 @@ static void tegra_pcie_unconfig_controller(struct tegra_pcie_dw *pcie)
pcie->cid, ret);
}
-static int tegra_pcie_init_controller(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_init_controller(struct tegra194_pcie *pcie)
{
struct dw_pcie *pci = &pcie->pci;
struct pcie_port *pp = &pci->pp;
@@ -1452,7 +1452,7 @@ static int tegra_pcie_init_controller(struct tegra_pcie_dw *pcie)
if (ret < 0)
return ret;
- pp->ops = &tegra_pcie_dw_host_ops;
+ pp->ops = &tegra194_pcie_host_ops;
ret = dw_pcie_host_init(pp);
if (ret < 0) {
@@ -1467,11 +1467,11 @@ fail_host_init:
return ret;
}
-static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_try_link_l2(struct tegra194_pcie *pcie)
{
u32 val;
- if (!tegra_pcie_dw_link_up(&pcie->pci))
+ if (!tegra194_pcie_link_up(&pcie->pci))
return 0;
val = appl_readl(pcie, APPL_RADM_STATUS);
@@ -1483,12 +1483,12 @@ static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
1, PME_ACK_TIMEOUT);
}
-static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
+static void tegra194_pcie_pme_turnoff(struct tegra194_pcie *pcie)
{
u32 data;
int err;
- if (!tegra_pcie_dw_link_up(&pcie->pci)) {
+ if (!tegra194_pcie_link_up(&pcie->pci)) {
dev_dbg(pcie->dev, "PCIe link is not up...!\n");
return;
}
@@ -1545,15 +1545,15 @@ static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
appl_writel(pcie, data, APPL_PINMUX);
}
-static void tegra_pcie_deinit_controller(struct tegra_pcie_dw *pcie)
+static void tegra_pcie_deinit_controller(struct tegra194_pcie *pcie)
{
tegra_pcie_downstream_dev_to_D0(pcie);
dw_pcie_host_deinit(&pcie->pci.pp);
- tegra_pcie_dw_pme_turnoff(pcie);
+ tegra194_pcie_pme_turnoff(pcie);
tegra_pcie_unconfig_controller(pcie);
}
-static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
+static int tegra_pcie_config_rp(struct tegra194_pcie *pcie)
{
struct device *dev = pcie->dev;
char *name;
@@ -1580,7 +1580,7 @@ static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
goto fail_pm_get_sync;
}
- pcie->link_state = tegra_pcie_dw_link_up(&pcie->pci);
+ pcie->link_state = tegra194_pcie_link_up(&pcie->pci);
if (!pcie->link_state) {
ret = -ENOMEDIUM;
goto fail_host_init;
@@ -1605,7 +1605,7 @@ fail_pm_get_sync:
return ret;
}
-static void pex_ep_event_pex_rst_assert(struct tegra_pcie_dw *pcie)
+static void pex_ep_event_pex_rst_assert(struct tegra194_pcie *pcie)
{
u32 val;
int ret;
@@ -1644,7 +1644,7 @@ static void pex_ep_event_pex_rst_assert(struct tegra_pcie_dw *pcie)
dev_dbg(pcie->dev, "Uninitialization of endpoint is completed\n");
}
-static void pex_ep_event_pex_rst_deassert(struct tegra_pcie_dw *pcie)
+static void pex_ep_event_pex_rst_deassert(struct tegra194_pcie *pcie)
{
struct dw_pcie *pci = &pcie->pci;
struct dw_pcie_ep *ep = &pci->ep;
@@ -1809,7 +1809,7 @@ fail_pll_init:
static irqreturn_t tegra_pcie_ep_pex_rst_irq(int irq, void *arg)
{
- struct tegra_pcie_dw *pcie = arg;
+ struct tegra194_pcie *pcie = arg;
if (gpiod_get_value(pcie->pex_rst_gpiod))
pex_ep_event_pex_rst_assert(pcie);
@@ -1819,7 +1819,7 @@ static irqreturn_t tegra_pcie_ep_pex_rst_irq(int irq, void *arg)
return IRQ_HANDLED;
}
-static int tegra_pcie_ep_raise_legacy_irq(struct tegra_pcie_dw *pcie, u16 irq)
+static int tegra_pcie_ep_raise_legacy_irq(struct tegra194_pcie *pcie, u16 irq)
{
/* Tegra194 supports only INTA */
if (irq > 1)
@@ -1831,7 +1831,7 @@ static int tegra_pcie_ep_raise_legacy_irq(struct tegra_pcie_dw *pcie, u16 irq)
return 0;
}
-static int tegra_pcie_ep_raise_msi_irq(struct tegra_pcie_dw *pcie, u16 irq)
+static int tegra_pcie_ep_raise_msi_irq(struct tegra194_pcie *pcie, u16 irq)
{
if (unlikely(irq > 31))
return -EINVAL;
@@ -1841,7 +1841,7 @@ static int tegra_pcie_ep_raise_msi_irq(struct tegra_pcie_dw *pcie, u16 irq)
return 0;
}
-static int tegra_pcie_ep_raise_msix_irq(struct tegra_pcie_dw *pcie, u16 irq)
+static int tegra_pcie_ep_raise_msix_irq(struct tegra194_pcie *pcie, u16 irq)
{
struct dw_pcie_ep *ep = &pcie->pci.ep;
@@ -1855,7 +1855,7 @@ static int tegra_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
u16 interrupt_num)
{
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
- struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+ struct tegra194_pcie *pcie = to_tegra_pcie(pci);
switch (type) {
case PCI_EPC_IRQ_LEGACY:
@@ -1896,7 +1896,7 @@ static const struct dw_pcie_ep_ops pcie_ep_ops = {
.get_features = tegra_pcie_ep_get_features,
};
-static int tegra_pcie_config_ep(struct tegra_pcie_dw *pcie,
+static int tegra_pcie_config_ep(struct tegra194_pcie *pcie,
struct platform_device *pdev)
{
struct dw_pcie *pci = &pcie->pci;
@@ -1957,12 +1957,12 @@ static int tegra_pcie_config_ep(struct tegra_pcie_dw *pcie,
return 0;
}
-static int tegra_pcie_dw_probe(struct platform_device *pdev)
+static int tegra194_pcie_probe(struct platform_device *pdev)
{
- const struct tegra_pcie_dw_of_data *data;
+ const struct tegra194_pcie_of_data *data;
struct device *dev = &pdev->dev;
struct resource *atu_dma_res;
- struct tegra_pcie_dw *pcie;
+ struct tegra194_pcie *pcie;
struct pcie_port *pp;
struct dw_pcie *pci;
struct phy **phys;
@@ -1988,7 +1988,7 @@ static int tegra_pcie_dw_probe(struct platform_device *pdev)
pcie->dev = &pdev->dev;
pcie->mode = (enum dw_pcie_device_mode)data->mode;
- ret = tegra_pcie_dw_parse_dt(pcie);
+ ret = tegra194_pcie_parse_dt(pcie);
if (ret < 0) {
const char *level = KERN_ERR;
@@ -2146,9 +2146,9 @@ fail:
return ret;
}
-static int tegra_pcie_dw_remove(struct platform_device *pdev)
+static int tegra194_pcie_remove(struct platform_device *pdev)
{
- struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+ struct tegra194_pcie *pcie = platform_get_drvdata(pdev);
if (!pcie->link_state)
return 0;
@@ -2164,9 +2164,9 @@ static int tegra_pcie_dw_remove(struct platform_device *pdev)
return 0;
}
-static int tegra_pcie_dw_suspend_late(struct device *dev)
+static int tegra194_pcie_suspend_late(struct device *dev)
{
- struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+ struct tegra194_pcie *pcie = dev_get_drvdata(dev);
u32 val;
if (!pcie->link_state)
@@ -2182,9 +2182,9 @@ static int tegra_pcie_dw_suspend_late(struct device *dev)
return 0;
}
-static int tegra_pcie_dw_suspend_noirq(struct device *dev)
+static int tegra194_pcie_suspend_noirq(struct device *dev)
{
- struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+ struct tegra194_pcie *pcie = dev_get_drvdata(dev);
if (!pcie->link_state)
return 0;
@@ -2193,15 +2193,15 @@ static int tegra_pcie_dw_suspend_noirq(struct device *dev)
pcie->msi_ctrl_int = dw_pcie_readl_dbi(&pcie->pci,
PORT_LOGIC_MSI_CTRL_INT_0_EN);
tegra_pcie_downstream_dev_to_D0(pcie);
- tegra_pcie_dw_pme_turnoff(pcie);
+ tegra194_pcie_pme_turnoff(pcie);
tegra_pcie_unconfig_controller(pcie);
return 0;
}
-static int tegra_pcie_dw_resume_noirq(struct device *dev)
+static int tegra194_pcie_resume_noirq(struct device *dev)
{
- struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+ struct tegra194_pcie *pcie = dev_get_drvdata(dev);
int ret;
if (!pcie->link_state)
@@ -2211,7 +2211,7 @@ static int tegra_pcie_dw_resume_noirq(struct device *dev)
if (ret < 0)
return ret;
- ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
+ ret = tegra194_pcie_host_init(&pcie->pci.pp);
if (ret < 0) {
dev_err(dev, "Failed to init host: %d\n", ret);
goto fail_host_init;
@@ -2219,7 +2219,7 @@ static int tegra_pcie_dw_resume_noirq(struct device *dev)
dw_pcie_setup_rc(&pcie->pci.pp);
- ret = tegra_pcie_dw_start_link(&pcie->pci);
+ ret = tegra194_pcie_start_link(&pcie->pci);
if (ret < 0)
goto fail_host_init;
@@ -2234,9 +2234,9 @@ fail_host_init:
return ret;
}
-static int tegra_pcie_dw_resume_early(struct device *dev)
+static int tegra194_pcie_resume_early(struct device *dev)
{
- struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+ struct tegra194_pcie *pcie = dev_get_drvdata(dev);
u32 val;
if (pcie->mode == DW_PCIE_EP_TYPE) {
@@ -2259,9 +2259,9 @@ static int tegra_pcie_dw_resume_early(struct device *dev)
return 0;
}
-static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
+static void tegra194_pcie_shutdown(struct platform_device *pdev)
{
- struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+ struct tegra194_pcie *pcie = platform_get_drvdata(pdev);
if (!pcie->link_state)
return;
@@ -2273,50 +2273,50 @@ static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
if (IS_ENABLED(CONFIG_PCI_MSI))
disable_irq(pcie->pci.pp.msi_irq);
- tegra_pcie_dw_pme_turnoff(pcie);
+ tegra194_pcie_pme_turnoff(pcie);
tegra_pcie_unconfig_controller(pcie);
}
-static const struct tegra_pcie_dw_of_data tegra_pcie_dw_rc_of_data = {
+static const struct tegra194_pcie_of_data tegra194_pcie_rc_of_data = {
.mode = DW_PCIE_RC_TYPE,
};
-static const struct tegra_pcie_dw_of_data tegra_pcie_dw_ep_of_data = {
+static const struct tegra194_pcie_of_data tegra194_pcie_ep_of_data = {
.mode = DW_PCIE_EP_TYPE,
};
-static const struct of_device_id tegra_pcie_dw_of_match[] = {
+static const struct of_device_id tegra194_pcie_of_match[] = {
{
.compatible = "nvidia,tegra194-pcie",
- .data = &tegra_pcie_dw_rc_of_data,
+ .data = &tegra194_pcie_rc_of_data,
},
{
.compatible = "nvidia,tegra194-pcie-ep",
- .data = &tegra_pcie_dw_ep_of_data,
+ .data = &tegra194_pcie_ep_of_data,
},
{},
};
-static const struct dev_pm_ops tegra_pcie_dw_pm_ops = {
- .suspend_late = tegra_pcie_dw_suspend_late,
- .suspend_noirq = tegra_pcie_dw_suspend_noirq,
- .resume_noirq = tegra_pcie_dw_resume_noirq,
- .resume_early = tegra_pcie_dw_resume_early,
+static const struct dev_pm_ops tegra194_pcie_pm_ops = {
+ .suspend_late = tegra194_pcie_suspend_late,
+ .suspend_noirq = tegra194_pcie_suspend_noirq,
+ .resume_noirq = tegra194_pcie_resume_noirq,
+ .resume_early = tegra194_pcie_resume_early,
};
-static struct platform_driver tegra_pcie_dw_driver = {
- .probe = tegra_pcie_dw_probe,
- .remove = tegra_pcie_dw_remove,
- .shutdown = tegra_pcie_dw_shutdown,
+static struct platform_driver tegra194_pcie_driver = {
+ .probe = tegra194_pcie_probe,
+ .remove = tegra194_pcie_remove,
+ .shutdown = tegra194_pcie_shutdown,
.driver = {
.name = "tegra194-pcie",
- .pm = &tegra_pcie_dw_pm_ops,
- .of_match_table = tegra_pcie_dw_of_match,
+ .pm = &tegra194_pcie_pm_ops,
+ .of_match_table = tegra194_pcie_of_match,
},
};
-module_platform_driver(tegra_pcie_dw_driver);
+module_platform_driver(tegra194_pcie_driver);
-MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
+MODULE_DEVICE_TABLE(of, tegra194_pcie_of_match);
MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c
index d05be942956e..b45ac3754242 100644
--- a/drivers/pci/controller/dwc/pcie-uniphier.c
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -61,9 +61,9 @@
#define PCL_RDLH_LINK_UP BIT(1)
#define PCL_XMLH_LINK_UP BIT(0)
-struct uniphier_pcie_priv {
- void __iomem *base;
+struct uniphier_pcie {
struct dw_pcie pci;
+ void __iomem *base;
struct clk *clk;
struct reset_control *rst;
struct phy *phy;
@@ -72,62 +72,62 @@ struct uniphier_pcie_priv {
#define to_uniphier_pcie(x) dev_get_drvdata((x)->dev)
-static void uniphier_pcie_ltssm_enable(struct uniphier_pcie_priv *priv,
+static void uniphier_pcie_ltssm_enable(struct uniphier_pcie *pcie,
bool enable)
{
u32 val;
- val = readl(priv->base + PCL_APP_READY_CTRL);
+ val = readl(pcie->base + PCL_APP_READY_CTRL);
if (enable)
val |= PCL_APP_LTSSM_ENABLE;
else
val &= ~PCL_APP_LTSSM_ENABLE;
- writel(val, priv->base + PCL_APP_READY_CTRL);
+ writel(val, pcie->base + PCL_APP_READY_CTRL);
}
-static void uniphier_pcie_init_rc(struct uniphier_pcie_priv *priv)
+static void uniphier_pcie_init_rc(struct uniphier_pcie *pcie)
{
u32 val;
/* set RC MODE */
- val = readl(priv->base + PCL_MODE);
+ val = readl(pcie->base + PCL_MODE);
val |= PCL_MODE_REGEN;
val &= ~PCL_MODE_REGVAL;
- writel(val, priv->base + PCL_MODE);
+ writel(val, pcie->base + PCL_MODE);
/* use auxiliary power detection */
- val = readl(priv->base + PCL_APP_PM0);
+ val = readl(pcie->base + PCL_APP_PM0);
val |= PCL_SYS_AUX_PWR_DET;
- writel(val, priv->base + PCL_APP_PM0);
+ writel(val, pcie->base + PCL_APP_PM0);
/* assert PERST# */
- val = readl(priv->base + PCL_PINCTRL0);
+ val = readl(pcie->base + PCL_PINCTRL0);
val &= ~(PCL_PERST_NOE_REGVAL | PCL_PERST_OUT_REGVAL
| PCL_PERST_PLDN_REGVAL);
val |= PCL_PERST_NOE_REGEN | PCL_PERST_OUT_REGEN
| PCL_PERST_PLDN_REGEN;
- writel(val, priv->base + PCL_PINCTRL0);
+ writel(val, pcie->base + PCL_PINCTRL0);
- uniphier_pcie_ltssm_enable(priv, false);
+ uniphier_pcie_ltssm_enable(pcie, false);
usleep_range(100000, 200000);
/* deassert PERST# */
- val = readl(priv->base + PCL_PINCTRL0);
+ val = readl(pcie->base + PCL_PINCTRL0);
val |= PCL_PERST_OUT_REGVAL | PCL_PERST_OUT_REGEN;
- writel(val, priv->base + PCL_PINCTRL0);
+ writel(val, pcie->base + PCL_PINCTRL0);
}
-static int uniphier_pcie_wait_rc(struct uniphier_pcie_priv *priv)
+static int uniphier_pcie_wait_rc(struct uniphier_pcie *pcie)
{
u32 status;
int ret;
/* wait PIPE clock */
- ret = readl_poll_timeout(priv->base + PCL_PIPEMON, status,
+ ret = readl_poll_timeout(pcie->base + PCL_PIPEMON, status,
status & PCL_PCLK_ALIVE, 100000, 1000000);
if (ret) {
- dev_err(priv->pci.dev,
+ dev_err(pcie->pci.dev,
"Failed to initialize controller in RC mode\n");
return ret;
}
@@ -137,10 +137,10 @@ static int uniphier_pcie_wait_rc(struct uniphier_pcie_priv *priv)
static int uniphier_pcie_link_up(struct dw_pcie *pci)
{
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
u32 val, mask;
- val = readl(priv->base + PCL_STATUS_LINK);
+ val = readl(pcie->base + PCL_STATUS_LINK);
mask = PCL_RDLH_LINK_UP | PCL_XMLH_LINK_UP;
return (val & mask) == mask;
@@ -148,39 +148,40 @@ static int uniphier_pcie_link_up(struct dw_pcie *pci)
static int uniphier_pcie_start_link(struct dw_pcie *pci)
{
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
- uniphier_pcie_ltssm_enable(priv, true);
+ uniphier_pcie_ltssm_enable(pcie, true);
return 0;
}
static void uniphier_pcie_stop_link(struct dw_pcie *pci)
{
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
- uniphier_pcie_ltssm_enable(priv, false);
+ uniphier_pcie_ltssm_enable(pcie, false);
}
-static void uniphier_pcie_irq_enable(struct uniphier_pcie_priv *priv)
+static void uniphier_pcie_irq_enable(struct uniphier_pcie *pcie)
{
- writel(PCL_RCV_INT_ALL_ENABLE, priv->base + PCL_RCV_INT);
- writel(PCL_RCV_INTX_ALL_ENABLE, priv->base + PCL_RCV_INTX);
+ writel(PCL_RCV_INT_ALL_ENABLE, pcie->base + PCL_RCV_INT);
+ writel(PCL_RCV_INTX_ALL_ENABLE, pcie->base + PCL_RCV_INTX);
}
+
static void uniphier_pcie_irq_mask(struct irq_data *d)
{
struct pcie_port *pp = irq_data_get_irq_chip_data(d);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
unsigned long flags;
u32 val;
raw_spin_lock_irqsave(&pp->lock, flags);
- val = readl(priv->base + PCL_RCV_INTX);
+ val = readl(pcie->base + PCL_RCV_INTX);
val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
- writel(val, priv->base + PCL_RCV_INTX);
+ writel(val, pcie->base + PCL_RCV_INTX);
raw_spin_unlock_irqrestore(&pp->lock, flags);
}
@@ -189,15 +190,15 @@ static void uniphier_pcie_irq_unmask(struct irq_data *d)
{
struct pcie_port *pp = irq_data_get_irq_chip_data(d);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
unsigned long flags;
u32 val;
raw_spin_lock_irqsave(&pp->lock, flags);
- val = readl(priv->base + PCL_RCV_INTX);
+ val = readl(pcie->base + PCL_RCV_INTX);
val &= ~BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
- writel(val, priv->base + PCL_RCV_INTX);
+ writel(val, pcie->base + PCL_RCV_INTX);
raw_spin_unlock_irqrestore(&pp->lock, flags);
}
@@ -226,13 +227,13 @@ static void uniphier_pcie_irq_handler(struct irq_desc *desc)
{
struct pcie_port *pp = irq_desc_get_handler_data(desc);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned long reg;
u32 val, bit;
/* INT for debug */
- val = readl(priv->base + PCL_RCV_INT);
+ val = readl(pcie->base + PCL_RCV_INT);
if (val & PCL_CFG_BW_MGT_STATUS)
dev_dbg(pci->dev, "Link Bandwidth Management Event\n");
@@ -243,16 +244,16 @@ static void uniphier_pcie_irq_handler(struct irq_desc *desc)
if (val & PCL_CFG_PME_MSI_STATUS)
dev_dbg(pci->dev, "PME Interrupt\n");
- writel(val, priv->base + PCL_RCV_INT);
+ writel(val, pcie->base + PCL_RCV_INT);
/* INTx */
chained_irq_enter(chip, desc);
- val = readl(priv->base + PCL_RCV_INTX);
+ val = readl(pcie->base + PCL_RCV_INTX);
reg = FIELD_GET(PCL_RCV_INTX_ALL_STATUS, val);
for_each_set_bit(bit, &reg, PCI_NUM_INTX)
- generic_handle_domain_irq(priv->legacy_irq_domain, bit);
+ generic_handle_domain_irq(pcie->legacy_irq_domain, bit);
chained_irq_exit(chip, desc);
}
@@ -260,7 +261,7 @@ static void uniphier_pcie_irq_handler(struct irq_desc *desc)
static int uniphier_pcie_config_legacy_irq(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
struct device_node *np = pci->dev->of_node;
struct device_node *np_intc;
int ret = 0;
@@ -278,9 +279,9 @@ static int uniphier_pcie_config_legacy_irq(struct pcie_port *pp)
goto out_put_node;
}
- priv->legacy_irq_domain = irq_domain_add_linear(np_intc, PCI_NUM_INTX,
+ pcie->legacy_irq_domain = irq_domain_add_linear(np_intc, PCI_NUM_INTX,
&uniphier_intx_domain_ops, pp);
- if (!priv->legacy_irq_domain) {
+ if (!pcie->legacy_irq_domain) {
dev_err(pci->dev, "Failed to get INTx domain\n");
ret = -ENODEV;
goto out_put_node;
@@ -297,14 +298,14 @@ out_put_node:
static int uniphier_pcie_host_init(struct pcie_port *pp)
{
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+ struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
int ret;
ret = uniphier_pcie_config_legacy_irq(pp);
if (ret)
return ret;
- uniphier_pcie_irq_enable(priv);
+ uniphier_pcie_irq_enable(pcie);
return 0;
}
@@ -313,36 +314,36 @@ static const struct dw_pcie_host_ops uniphier_pcie_host_ops = {
.host_init = uniphier_pcie_host_init,
};
-static int uniphier_pcie_host_enable(struct uniphier_pcie_priv *priv)
+static int uniphier_pcie_host_enable(struct uniphier_pcie *pcie)
{
int ret;
- ret = clk_prepare_enable(priv->clk);
+ ret = clk_prepare_enable(pcie->clk);
if (ret)
return ret;
- ret = reset_control_deassert(priv->rst);
+ ret = reset_control_deassert(pcie->rst);
if (ret)
goto out_clk_disable;
- uniphier_pcie_init_rc(priv);
+ uniphier_pcie_init_rc(pcie);
- ret = phy_init(priv->phy);
+ ret = phy_init(pcie->phy);
if (ret)
goto out_rst_assert;
- ret = uniphier_pcie_wait_rc(priv);
+ ret = uniphier_pcie_wait_rc(pcie);
if (ret)
goto out_phy_exit;
return 0;
out_phy_exit:
- phy_exit(priv->phy);
+ phy_exit(pcie->phy);
out_rst_assert:
- reset_control_assert(priv->rst);
+ reset_control_assert(pcie->rst);
out_clk_disable:
- clk_disable_unprepare(priv->clk);
+ clk_disable_unprepare(pcie->clk);
return ret;
}
@@ -356,41 +357,41 @@ static const struct dw_pcie_ops dw_pcie_ops = {
static int uniphier_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct uniphier_pcie_priv *priv;
+ struct uniphier_pcie *pcie;
int ret;
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
return -ENOMEM;
- priv->pci.dev = dev;
- priv->pci.ops = &dw_pcie_ops;
+ pcie->pci.dev = dev;
+ pcie->pci.ops = &dw_pcie_ops;
- priv->base = devm_platform_ioremap_resource_byname(pdev, "link");
- if (IS_ERR(priv->base))
- return PTR_ERR(priv->base);
+ pcie->base = devm_platform_ioremap_resource_byname(pdev, "link");
+ if (IS_ERR(pcie->base))
+ return PTR_ERR(pcie->base);
- priv->clk = devm_clk_get(dev, NULL);
- if (IS_ERR(priv->clk))
- return PTR_ERR(priv->clk);
+ pcie->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(pcie->clk))
+ return PTR_ERR(pcie->clk);
- priv->rst = devm_reset_control_get_shared(dev, NULL);
- if (IS_ERR(priv->rst))
- return PTR_ERR(priv->rst);
+ pcie->rst = devm_reset_control_get_shared(dev, NULL);
+ if (IS_ERR(pcie->rst))
+ return PTR_ERR(pcie->rst);
- priv->phy = devm_phy_optional_get(dev, "pcie-phy");
- if (IS_ERR(priv->phy))
- return PTR_ERR(priv->phy);
+ pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+ if (IS_ERR(pcie->phy))
+ return PTR_ERR(pcie->phy);
- platform_set_drvdata(pdev, priv);
+ platform_set_drvdata(pdev, pcie);
- ret = uniphier_pcie_host_enable(priv);
+ ret = uniphier_pcie_host_enable(pcie);
if (ret)
return ret;
- priv->pci.pp.ops = &uniphier_pcie_host_ops;
+ pcie->pci.pp.ops = &uniphier_pcie_host_ops;
- return dw_pcie_host_init(&priv->pci.pp);
+ return dw_pcie_host_init(&pcie->pci.pp);
}
static const struct of_device_id uniphier_pcie_match[] = {
diff --git a/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
index 306950272fd6..d7b7350f02dd 100644
--- a/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
+++ b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
@@ -34,31 +34,31 @@
#define PF_DBG_WE BIT(31)
#define PF_DBG_PABR BIT(27)
-#define to_ls_pcie_g4(x) platform_get_drvdata((x)->pdev)
+#define to_ls_g4_pcie(x) platform_get_drvdata((x)->pdev)
-struct ls_pcie_g4 {
+struct ls_g4_pcie {
struct mobiveil_pcie pci;
struct delayed_work dwork;
int irq;
};
-static inline u32 ls_pcie_g4_pf_readl(struct ls_pcie_g4 *pcie, u32 off)
+static inline u32 ls_g4_pcie_pf_readl(struct ls_g4_pcie *pcie, u32 off)
{
return ioread32(pcie->pci.csr_axi_slave_base + PCIE_PF_OFF + off);
}
-static inline void ls_pcie_g4_pf_writel(struct ls_pcie_g4 *pcie,
+static inline void ls_g4_pcie_pf_writel(struct ls_g4_pcie *pcie,
u32 off, u32 val)
{
iowrite32(val, pcie->pci.csr_axi_slave_base + PCIE_PF_OFF + off);
}
-static int ls_pcie_g4_link_up(struct mobiveil_pcie *pci)
+static int ls_g4_pcie_link_up(struct mobiveil_pcie *pci)
{
- struct ls_pcie_g4 *pcie = to_ls_pcie_g4(pci);
+ struct ls_g4_pcie *pcie = to_ls_g4_pcie(pci);
u32 state;
- state = ls_pcie_g4_pf_readl(pcie, PCIE_PF_DBG);
+ state = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
state = state & PF_DBG_LTSSM_MASK;
if (state == PF_DBG_LTSSM_L0)
@@ -67,14 +67,14 @@ static int ls_pcie_g4_link_up(struct mobiveil_pcie *pci)
return 0;
}
-static void ls_pcie_g4_disable_interrupt(struct ls_pcie_g4 *pcie)
+static void ls_g4_pcie_disable_interrupt(struct ls_g4_pcie *pcie)
{
struct mobiveil_pcie *mv_pci = &pcie->pci;
mobiveil_csr_writel(mv_pci, 0, PAB_INTP_AMBA_MISC_ENB);
}
-static void ls_pcie_g4_enable_interrupt(struct ls_pcie_g4 *pcie)
+static void ls_g4_pcie_enable_interrupt(struct ls_g4_pcie *pcie)
{
struct mobiveil_pcie *mv_pci = &pcie->pci;
u32 val;
@@ -87,7 +87,7 @@ static void ls_pcie_g4_enable_interrupt(struct ls_pcie_g4 *pcie)
mobiveil_csr_writel(mv_pci, val, PAB_INTP_AMBA_MISC_ENB);
}
-static int ls_pcie_g4_reinit_hw(struct ls_pcie_g4 *pcie)
+static int ls_g4_pcie_reinit_hw(struct ls_g4_pcie *pcie)
{
struct mobiveil_pcie *mv_pci = &pcie->pci;
struct device *dev = &mv_pci->pdev->dev;
@@ -97,7 +97,7 @@ static int ls_pcie_g4_reinit_hw(struct ls_pcie_g4 *pcie)
/* Poll for pab_csb_reset to set and PAB activity to clear */
do {
usleep_range(10, 15);
- val = ls_pcie_g4_pf_readl(pcie, PCIE_PF_INT_STAT);
+ val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_INT_STAT);
act_stat = mobiveil_csr_readl(mv_pci, PAB_ACTIVITY_STAT);
} while (((val & PF_INT_STAT_PABRST) == 0 || act_stat) && to--);
if (to < 0) {
@@ -106,22 +106,22 @@ static int ls_pcie_g4_reinit_hw(struct ls_pcie_g4 *pcie)
}
/* clear PEX_RESET bit in PEX_PF0_DBG register */
- val = ls_pcie_g4_pf_readl(pcie, PCIE_PF_DBG);
+ val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
val |= PF_DBG_WE;
- ls_pcie_g4_pf_writel(pcie, PCIE_PF_DBG, val);
+ ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
- val = ls_pcie_g4_pf_readl(pcie, PCIE_PF_DBG);
+ val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
val |= PF_DBG_PABR;
- ls_pcie_g4_pf_writel(pcie, PCIE_PF_DBG, val);
+ ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
- val = ls_pcie_g4_pf_readl(pcie, PCIE_PF_DBG);
+ val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
val &= ~PF_DBG_WE;
- ls_pcie_g4_pf_writel(pcie, PCIE_PF_DBG, val);
+ ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
mobiveil_host_init(mv_pci, true);
to = 100;
- while (!ls_pcie_g4_link_up(mv_pci) && to--)
+ while (!ls_g4_pcie_link_up(mv_pci) && to--)
usleep_range(200, 250);
if (to < 0) {
dev_err(dev, "PCIe link training timeout\n");
@@ -131,9 +131,9 @@ static int ls_pcie_g4_reinit_hw(struct ls_pcie_g4 *pcie)
return 0;
}
-static irqreturn_t ls_pcie_g4_isr(int irq, void *dev_id)
+static irqreturn_t ls_g4_pcie_isr(int irq, void *dev_id)
{
- struct ls_pcie_g4 *pcie = (struct ls_pcie_g4 *)dev_id;
+ struct ls_g4_pcie *pcie = (struct ls_g4_pcie *)dev_id;
struct mobiveil_pcie *mv_pci = &pcie->pci;
u32 val;
@@ -142,7 +142,7 @@ static irqreturn_t ls_pcie_g4_isr(int irq, void *dev_id)
return IRQ_NONE;
if (val & PAB_INTP_RESET) {
- ls_pcie_g4_disable_interrupt(pcie);
+ ls_g4_pcie_disable_interrupt(pcie);
schedule_delayed_work(&pcie->dwork, msecs_to_jiffies(1));
}
@@ -151,9 +151,9 @@ static irqreturn_t ls_pcie_g4_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int ls_pcie_g4_interrupt_init(struct mobiveil_pcie *mv_pci)
+static int ls_g4_pcie_interrupt_init(struct mobiveil_pcie *mv_pci)
{
- struct ls_pcie_g4 *pcie = to_ls_pcie_g4(mv_pci);
+ struct ls_g4_pcie *pcie = to_ls_g4_pcie(mv_pci);
struct platform_device *pdev = mv_pci->pdev;
struct device *dev = &pdev->dev;
int ret;
@@ -162,7 +162,7 @@ static int ls_pcie_g4_interrupt_init(struct mobiveil_pcie *mv_pci)
if (pcie->irq < 0)
return pcie->irq;
- ret = devm_request_irq(dev, pcie->irq, ls_pcie_g4_isr,
+ ret = devm_request_irq(dev, pcie->irq, ls_g4_pcie_isr,
IRQF_SHARED, pdev->name, pcie);
if (ret) {
dev_err(dev, "Can't register PCIe IRQ, errno = %d\n", ret);
@@ -172,11 +172,11 @@ static int ls_pcie_g4_interrupt_init(struct mobiveil_pcie *mv_pci)
return 0;
}
-static void ls_pcie_g4_reset(struct work_struct *work)
+static void ls_g4_pcie_reset(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
work);
- struct ls_pcie_g4 *pcie = container_of(dwork, struct ls_pcie_g4, dwork);
+ struct ls_g4_pcie *pcie = container_of(dwork, struct ls_g4_pcie, dwork);
struct mobiveil_pcie *mv_pci = &pcie->pci;
u16 ctrl;
@@ -184,26 +184,26 @@ static void ls_pcie_g4_reset(struct work_struct *work)
ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
mobiveil_csr_writew(mv_pci, ctrl, PCI_BRIDGE_CONTROL);
- if (!ls_pcie_g4_reinit_hw(pcie))
+ if (!ls_g4_pcie_reinit_hw(pcie))
return;
- ls_pcie_g4_enable_interrupt(pcie);
+ ls_g4_pcie_enable_interrupt(pcie);
}
-static struct mobiveil_rp_ops ls_pcie_g4_rp_ops = {
- .interrupt_init = ls_pcie_g4_interrupt_init,
+static struct mobiveil_rp_ops ls_g4_pcie_rp_ops = {
+ .interrupt_init = ls_g4_pcie_interrupt_init,
};
-static const struct mobiveil_pab_ops ls_pcie_g4_pab_ops = {
- .link_up = ls_pcie_g4_link_up,
+static const struct mobiveil_pab_ops ls_g4_pcie_pab_ops = {
+ .link_up = ls_g4_pcie_link_up,
};
-static int __init ls_pcie_g4_probe(struct platform_device *pdev)
+static int __init ls_g4_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct pci_host_bridge *bridge;
struct mobiveil_pcie *mv_pci;
- struct ls_pcie_g4 *pcie;
+ struct ls_g4_pcie *pcie;
struct device_node *np = dev->of_node;
int ret;
@@ -220,13 +220,13 @@ static int __init ls_pcie_g4_probe(struct platform_device *pdev)
mv_pci = &pcie->pci;
mv_pci->pdev = pdev;
- mv_pci->ops = &ls_pcie_g4_pab_ops;
- mv_pci->rp.ops = &ls_pcie_g4_rp_ops;
+ mv_pci->ops = &ls_g4_pcie_pab_ops;
+ mv_pci->rp.ops = &ls_g4_pcie_rp_ops;
mv_pci->rp.bridge = bridge;
platform_set_drvdata(pdev, pcie);
- INIT_DELAYED_WORK(&pcie->dwork, ls_pcie_g4_reset);
+ INIT_DELAYED_WORK(&pcie->dwork, ls_g4_pcie_reset);
ret = mobiveil_pcie_host_probe(mv_pci);
if (ret) {
@@ -234,22 +234,22 @@ static int __init ls_pcie_g4_probe(struct platform_device *pdev)
return ret;
}
- ls_pcie_g4_enable_interrupt(pcie);
+ ls_g4_pcie_enable_interrupt(pcie);
return 0;
}
-static const struct of_device_id ls_pcie_g4_of_match[] = {
+static const struct of_device_id ls_g4_pcie_of_match[] = {
{ .compatible = "fsl,lx2160a-pcie", },
{ },
};
-static struct platform_driver ls_pcie_g4_driver = {
+static struct platform_driver ls_g4_pcie_driver = {
.driver = {
.name = "layerscape-pcie-gen4",
- .of_match_table = ls_pcie_g4_of_match,
+ .of_match_table = ls_g4_pcie_of_match,
.suppress_bind_attrs = true,
},
};
-builtin_platform_driver_probe(ls_pcie_g4_driver, ls_pcie_g4_probe);
+builtin_platform_driver_probe(ls_g4_pcie_driver, ls_g4_pcie_probe);
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index c3b725afa11f..4f5b44827d21 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -115,6 +115,7 @@
#define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54)
#define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58)
#define PCIE_MSI_MASK_REG (CONTROL_BASE_ADDR + 0x5C)
+#define PCIE_MSI_ALL_MASK GENMASK(31, 0)
#define PCIE_MSI_PAYLOAD_REG (CONTROL_BASE_ADDR + 0x9C)
#define PCIE_MSI_DATA_MASK GENMASK(15, 0)
@@ -570,6 +571,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
/* Clear all interrupts */
+ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
@@ -582,7 +584,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
/* Unmask all MSIs */
- advk_writel(pcie, 0, PCIE_MSI_MASK_REG);
+ advk_writel(pcie, ~(u32)PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
/* Enable summary interrupt for GIC SPI source */
reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK);
@@ -872,11 +874,15 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
return PCI_BRIDGE_EMUL_HANDLED;
}
- case PCI_CAP_LIST_ID:
case PCI_EXP_DEVCAP:
case PCI_EXP_DEVCTL:
+ case PCI_EXP_DEVCAP2:
+ case PCI_EXP_DEVCTL2:
+ case PCI_EXP_LNKCAP2:
+ case PCI_EXP_LNKCTL2:
*value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
return PCI_BRIDGE_EMUL_HANDLED;
+
default:
return PCI_BRIDGE_EMUL_NOT_HANDLED;
}
@@ -890,10 +896,6 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
struct advk_pcie *pcie = bridge->data;
switch (reg) {
- case PCI_EXP_DEVCTL:
- advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
- break;
-
case PCI_EXP_LNKCTL:
advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
if (new & PCI_EXP_LNKCTL_RL)
@@ -915,6 +917,12 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
advk_writel(pcie, new, PCIE_ISR0_REG);
break;
+ case PCI_EXP_DEVCTL:
+ case PCI_EXP_DEVCTL2:
+ case PCI_EXP_LNKCTL2:
+ advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+ break;
+
default:
break;
}
@@ -953,6 +961,9 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
/* Support interrupt A for MSI feature */
bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE;
+ /* Aardvark HW provides PCIe Capability structure in version 2 */
+ bridge->pcie_conf.cap = cpu_to_le16(2);
+
/* Indicates supports for Completion Retry Status */
bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS);
@@ -1017,10 +1028,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
u32 reg;
int ret;
- if (!advk_pcie_valid_device(pcie, bus, devfn)) {
- *val = 0xffffffff;
+ if (!advk_pcie_valid_device(pcie, bus, devfn))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
if (pci_is_root_bus(bus))
return pci_bridge_emul_conf_read(&pcie->bridge, where,
@@ -1383,7 +1392,7 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie)
msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG);
- msi_status = msi_val & ~msi_mask;
+ msi_status = msi_val & ((~msi_mask) & PCIE_MSI_ALL_MASK);
for (msi_idx = 0; msi_idx < MSI_IRQ_NUM; msi_idx++) {
if (!(BIT(msi_idx) & msi_status))
@@ -1535,8 +1544,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
* only PIO for issuing configuration transfers which does
* not use PCIe window configuration.
*/
- if (type != IORESOURCE_MEM && type != IORESOURCE_MEM_64 &&
- type != IORESOURCE_IO)
+ if (type != IORESOURCE_MEM && type != IORESOURCE_IO)
continue;
/*
@@ -1544,8 +1552,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
* configuration is set to transparent memory access so it
* does not need window configuration.
*/
- if ((type == IORESOURCE_MEM || type == IORESOURCE_MEM_64) &&
- entry->offset == 0)
+ if (type == IORESOURCE_MEM && entry->offset == 0)
continue;
/*
@@ -1677,20 +1684,64 @@ static int advk_pcie_remove(struct platform_device *pdev)
{
struct advk_pcie *pcie = platform_get_drvdata(pdev);
struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+ u32 val;
int i;
+ /* Remove PCI bus with all devices */
pci_lock_rescan_remove();
pci_stop_root_bus(bridge->bus);
pci_remove_root_bus(bridge->bus);
pci_unlock_rescan_remove();
+ /* Disable Root Bridge I/O space, memory space and bus mastering */
+ val = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+ val &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ advk_writel(pcie, val, PCIE_CORE_CMD_STATUS_REG);
+
+ /* Disable MSI */
+ val = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
+ val &= ~PCIE_CORE_CTRL2_MSI_ENABLE;
+ advk_writel(pcie, val, PCIE_CORE_CTRL2_REG);
+
+ /* Clear MSI address */
+ advk_writel(pcie, 0, PCIE_MSI_ADDR_LOW_REG);
+ advk_writel(pcie, 0, PCIE_MSI_ADDR_HIGH_REG);
+
+ /* Mask all interrupts */
+ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
+ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_MASK_REG);
+ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
+ advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_MASK_REG);
+
+ /* Clear all interrupts */
+ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
+ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
+ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
+ advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
+
+ /* Remove IRQ domains */
advk_pcie_remove_msi_irq_domain(pcie);
advk_pcie_remove_irq_domain(pcie);
+ /* Free config space for emulated root bridge */
+ pci_bridge_emul_cleanup(&pcie->bridge);
+
+ /* Assert PERST# signal which prepares PCIe card for power down */
+ if (pcie->reset_gpio)
+ gpiod_set_value_cansleep(pcie->reset_gpio, 1);
+
+ /* Disable link training */
+ val = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+ val &= ~LINK_TRAINING_EN;
+ advk_writel(pcie, val, PCIE_CORE_CTRL0_REG);
+
/* Disable outbound address windows mapping */
for (i = 0; i < OB_WIN_COUNT; i++)
advk_pcie_disable_ob_win(pcie, i);
+ /* Disable phy */
+ advk_pcie_disable_phy(pcie);
+
return 0;
}
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 45b6ff8dc665..20ea2ee330b8 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -43,13 +43,12 @@
#include <linux/pci-ecam.h>
#include <linux/delay.h>
#include <linux/semaphore.h>
-#include <linux/irqdomain.h>
-#include <asm/irqdomain.h>
-#include <asm/apic.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/hyperv.h>
#include <linux/refcount.h>
+#include <linux/irqdomain.h>
+#include <linux/acpi.h>
#include <asm/mshyperv.h>
/*
@@ -583,6 +582,265 @@ struct hv_pci_compl {
static void hv_pci_onchannelcallback(void *context);
+#ifdef CONFIG_X86
+#define DELIVERY_MODE APIC_DELIVERY_MODE_FIXED
+#define FLOW_HANDLER handle_edge_irq
+#define FLOW_NAME "edge"
+
+static int hv_pci_irqchip_init(void)
+{
+ return 0;
+}
+
+static struct irq_domain *hv_pci_get_root_domain(void)
+{
+ return x86_vector_domain;
+}
+
+static unsigned int hv_msi_get_int_vector(struct irq_data *data)
+{
+ struct irq_cfg *cfg = irqd_cfg(data);
+
+ return cfg->vector;
+}
+
+static void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
+ struct msi_desc *msi_desc)
+{
+ msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
+ msi_entry->data.as_uint32 = msi_desc->msg.data;
+}
+
+static int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *info)
+{
+ return pci_msi_prepare(domain, dev, nvec, info);
+}
+#elif defined(CONFIG_ARM64)
+/*
+ * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit
+ * of room at the start to allow for SPIs to be specified through ACPI and
+ * starting with a power of two to satisfy power of 2 multi-MSI requirement.
+ */
+#define HV_PCI_MSI_SPI_START 64
+#define HV_PCI_MSI_SPI_NR (1020 - HV_PCI_MSI_SPI_START)
+#define DELIVERY_MODE 0
+#define FLOW_HANDLER NULL
+#define FLOW_NAME NULL
+#define hv_msi_prepare NULL
+
+struct hv_pci_chip_data {
+ DECLARE_BITMAP(spi_map, HV_PCI_MSI_SPI_NR);
+ struct mutex map_lock;
+};
+
+/* Hyper-V vPCI MSI GIC IRQ domain */
+static struct irq_domain *hv_msi_gic_irq_domain;
+
+/* Hyper-V PCI MSI IRQ chip */
+static struct irq_chip hv_arm64_msi_irq_chip = {
+ .name = "MSI",
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent
+};
+
+static unsigned int hv_msi_get_int_vector(struct irq_data *irqd)
+{
+ return irqd->parent_data->hwirq;
+}
+
+static void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
+ struct msi_desc *msi_desc)
+{
+ msi_entry->address = ((u64)msi_desc->msg.address_hi << 32) |
+ msi_desc->msg.address_lo;
+ msi_entry->data = msi_desc->msg.data;
+}
+
+/*
+ * @nr_bm_irqs: Indicates the number of IRQs that were allocated from
+ * the bitmap.
+ * @nr_dom_irqs: Indicates the number of IRQs that were allocated from
+ * the parent domain.
+ */
+static void hv_pci_vec_irq_free(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_bm_irqs,
+ unsigned int nr_dom_irqs)
+{
+ struct hv_pci_chip_data *chip_data = domain->host_data;
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ int first = d->hwirq - HV_PCI_MSI_SPI_START;
+ int i;
+
+ mutex_lock(&chip_data->map_lock);
+ bitmap_release_region(chip_data->spi_map,
+ first,
+ get_count_order(nr_bm_irqs));
+ mutex_unlock(&chip_data->map_lock);
+ for (i = 0; i < nr_dom_irqs; i++) {
+ if (i)
+ d = irq_domain_get_irq_data(domain, virq + i);
+ irq_domain_reset_irq_data(d);
+ }
+
+ irq_domain_free_irqs_parent(domain, virq, nr_dom_irqs);
+}
+
+static void hv_pci_vec_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs)
+{
+ hv_pci_vec_irq_free(domain, virq, nr_irqs, nr_irqs);
+}
+
+static int hv_pci_vec_alloc_device_irq(struct irq_domain *domain,
+ unsigned int nr_irqs,
+ irq_hw_number_t *hwirq)
+{
+ struct hv_pci_chip_data *chip_data = domain->host_data;
+ int index;
+
+ /* Find and allocate region from the SPI bitmap */
+ mutex_lock(&chip_data->map_lock);
+ index = bitmap_find_free_region(chip_data->spi_map,
+ HV_PCI_MSI_SPI_NR,
+ get_count_order(nr_irqs));
+ mutex_unlock(&chip_data->map_lock);
+ if (index < 0)
+ return -ENOSPC;
+
+ *hwirq = index + HV_PCI_MSI_SPI_START;
+
+ return 0;
+}
+
+static int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec fwspec;
+ struct irq_data *d;
+ int ret;
+
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 2;
+ fwspec.param[0] = hwirq;
+ fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the interrupt specifier is not coming from ACPI or DT, the
+ * trigger type will need to be set explicitly. Otherwise, it will be
+ * set to whatever is in the GIC configuration.
+ */
+ d = irq_domain_get_irq_data(domain->parent, virq);
+
+ return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+}
+
+static int hv_pci_vec_irq_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *args)
+{
+ irq_hw_number_t hwirq;
+ unsigned int i;
+ int ret;
+
+ ret = hv_pci_vec_alloc_device_irq(domain, nr_irqs, &hwirq);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = hv_pci_vec_irq_gic_domain_alloc(domain, virq + i,
+ hwirq + i);
+ if (ret) {
+ hv_pci_vec_irq_free(domain, virq, nr_irqs, i);
+ return ret;
+ }
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i,
+ hwirq + i,
+ &hv_arm64_msi_irq_chip,
+ domain->host_data);
+ pr_debug("pID:%d vID:%u\n", (int)(hwirq + i), virq + i);
+ }
+
+ return 0;
+}
+
+/*
+ * Pick the first cpu as the irq affinity that can be temporarily used for
+ * composing MSI from the hypervisor. GIC will eventually set the right
+ * affinity for the irq and the 'unmask' will retarget the interrupt to that
+ * cpu.
+ */
+static int hv_pci_vec_irq_domain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+{
+ int cpu = cpumask_first(cpu_present_mask);
+
+ irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+
+ return 0;
+}
+
+static const struct irq_domain_ops hv_pci_domain_ops = {
+ .alloc = hv_pci_vec_irq_domain_alloc,
+ .free = hv_pci_vec_irq_domain_free,
+ .activate = hv_pci_vec_irq_domain_activate,
+};
+
+static int hv_pci_irqchip_init(void)
+{
+ static struct hv_pci_chip_data *chip_data;
+ struct fwnode_handle *fn = NULL;
+ int ret = -ENOMEM;
+
+ chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
+ if (!chip_data)
+ return ret;
+
+ mutex_init(&chip_data->map_lock);
+ fn = irq_domain_alloc_named_fwnode("hv_vpci_arm64");
+ if (!fn)
+ goto free_chip;
+
+ /*
+ * IRQ domain once enabled, should not be removed since there is no
+ * way to ensure that all the corresponding devices are also gone and
+ * no interrupts will be generated.
+ */
+ hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0, HV_PCI_MSI_SPI_NR,
+ fn, &hv_pci_domain_ops,
+ chip_data);
+
+ if (!hv_msi_gic_irq_domain) {
+ pr_err("Failed to create Hyper-V arm64 vPCI MSI IRQ domain\n");
+ goto free_chip;
+ }
+
+ return 0;
+
+free_chip:
+ kfree(chip_data);
+ if (fn)
+ irq_domain_free_fwnode(fn);
+
+ return ret;
+}
+
+static struct irq_domain *hv_pci_get_root_domain(void)
+{
+ return hv_msi_gic_irq_domain;
+}
+#endif /* CONFIG_ARM64 */
+
/**
* hv_pci_generic_compl() - Invoked for a completion packet
* @context: Set up by the sender of the packet.
@@ -1191,17 +1449,11 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
put_pcichild(hpdev);
}
-static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest,
- bool force)
-{
- struct irq_data *parent = data->parent_data;
-
- return parent->chip->irq_set_affinity(parent, dest, force);
-}
-
static void hv_irq_mask(struct irq_data *data)
{
pci_msi_mask_irq(data);
+ if (data->parent_data->chip->irq_mask)
+ irq_chip_mask_parent(data);
}
/**
@@ -1217,7 +1469,6 @@ static void hv_irq_mask(struct irq_data *data)
static void hv_irq_unmask(struct irq_data *data)
{
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
- struct irq_cfg *cfg = irqd_cfg(data);
struct hv_retarget_device_interrupt *params;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
@@ -1246,7 +1497,7 @@ static void hv_irq_unmask(struct irq_data *data)
(hbus->hdev->dev_instance.b[7] << 8) |
(hbus->hdev->dev_instance.b[6] & 0xf8) |
PCI_FUNC(pdev->devfn);
- params->int_target.vector = cfg->vector;
+ params->int_target.vector = hv_msi_get_int_vector(data);
/*
* Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by
@@ -1319,6 +1570,8 @@ exit_unlock:
dev_err(&hbus->hdev->device,
"%s() failed: %#llx", __func__, res);
+ if (data->parent_data->chip->irq_unmask)
+ irq_chip_unmask_parent(data);
pci_msi_unmask_irq(data);
}
@@ -1347,7 +1600,7 @@ static u32 hv_compose_msi_req_v1(
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
/*
* Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1377,7 +1630,7 @@ static u32 hv_compose_msi_req_v2(
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] =
hv_cpu_number_to_vp_number(cpu);
@@ -1397,7 +1650,7 @@ static u32 hv_compose_msi_req_v3(
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.reserved = 0;
int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] =
hv_cpu_number_to_vp_number(cpu);
@@ -1419,7 +1672,6 @@ static u32 hv_compose_msi_req_v3(
*/
static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
struct hv_pcibus_device *hbus;
struct vmbus_channel *channel;
struct hv_pci_dev *hpdev;
@@ -1470,7 +1722,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
dest,
hpdev->desc.win_slot.slot,
- cfg->vector);
+ hv_msi_get_int_vector(data));
break;
case PCI_PROTOCOL_VERSION_1_2:
@@ -1478,14 +1730,14 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
dest,
hpdev->desc.win_slot.slot,
- cfg->vector);
+ hv_msi_get_int_vector(data));
break;
case PCI_PROTOCOL_VERSION_1_4:
size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
dest,
hpdev->desc.win_slot.slot,
- cfg->vector);
+ hv_msi_get_int_vector(data));
break;
default:
@@ -1594,14 +1846,18 @@ return_null_message:
static struct irq_chip hv_msi_irq_chip = {
.name = "Hyper-V PCIe MSI",
.irq_compose_msi_msg = hv_compose_msi_msg,
- .irq_set_affinity = hv_set_affinity,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+#ifdef CONFIG_X86
.irq_ack = irq_chip_ack_parent,
+#elif defined(CONFIG_ARM64)
+ .irq_eoi = irq_chip_eoi_parent,
+#endif
.irq_mask = hv_irq_mask,
.irq_unmask = hv_irq_unmask,
};
static struct msi_domain_ops hv_msi_ops = {
- .msi_prepare = pci_msi_prepare,
+ .msi_prepare = hv_msi_prepare,
.msi_free = hv_msi_free,
};
@@ -1625,12 +1881,12 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
MSI_FLAG_PCI_MSIX);
- hbus->msi_info.handler = handle_edge_irq;
- hbus->msi_info.handler_name = "edge";
+ hbus->msi_info.handler = FLOW_HANDLER;
+ hbus->msi_info.handler_name = FLOW_NAME;
hbus->msi_info.data = hbus;
hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
&hbus->msi_info,
- x86_vector_domain);
+ hv_pci_get_root_domain());
if (!hbus->irq_domain) {
dev_err(&hbus->hdev->device,
"Failed to build an MSI IRQ domain\n");
@@ -1774,7 +2030,7 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
* If the memory enable bit is already set, Hyper-V silently ignores
* the below BAR updates, and the related PCI device driver can not
* work, because reading from the device register(s) always returns
- * 0xFFFFFFFF.
+ * 0xFFFFFFFF (PCI_ERROR_RESPONSE).
*/
list_for_each_entry(hpdev, &hbus->children, list_entry) {
_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
@@ -3547,9 +3803,15 @@ static void __exit exit_hv_pci_drv(void)
static int __init init_hv_pci_drv(void)
{
+ int ret;
+
if (!hv_is_hyperv_initialized())
return -ENODEV;
+ ret = hv_pci_irqchip_init();
+ if (ret)
+ return ret;
+
/* Set the invalid domain number's bit, so it will not be used */
set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index ed13e81cd691..71258ea3d35f 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -6,6 +6,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include <linux/clk.h>
#include <linux/delay.h>
@@ -51,10 +52,14 @@
PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where) | \
PCIE_CONF_ADDR_EN)
#define PCIE_CONF_DATA_OFF 0x18fc
+#define PCIE_INT_CAUSE_OFF 0x1900
+#define PCIE_INT_PM_PME BIT(28)
#define PCIE_MASK_OFF 0x1910
#define PCIE_MASK_ENABLE_INTS 0x0f000000
#define PCIE_CTRL_OFF 0x1a00
#define PCIE_CTRL_X1_MODE 0x0001
+#define PCIE_CTRL_RC_MODE BIT(1)
+#define PCIE_CTRL_MASTER_HOT_RESET BIT(24)
#define PCIE_STAT_OFF 0x1a04
#define PCIE_STAT_BUS 0xff00
#define PCIE_STAT_DEV 0x1f0000
@@ -125,6 +130,11 @@ static bool mvebu_pcie_link_up(struct mvebu_pcie_port *port)
return !(mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_LINK_DOWN);
}
+static u8 mvebu_pcie_get_local_bus_nr(struct mvebu_pcie_port *port)
+{
+ return (mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_BUS) >> 8;
+}
+
static void mvebu_pcie_set_local_bus_nr(struct mvebu_pcie_port *port, int nr)
{
u32 stat;
@@ -145,22 +155,13 @@ static void mvebu_pcie_set_local_dev_nr(struct mvebu_pcie_port *port, int nr)
mvebu_writel(port, stat, PCIE_STAT_OFF);
}
-/*
- * Setup PCIE BARs and Address Decode Wins:
- * BAR[0] -> internal registers (needed for MSI)
- * BAR[1] -> covers all DRAM banks
- * BAR[2] -> Disabled
- * WIN[0-3] -> DRAM bank[0-3]
- */
-static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
+static void mvebu_pcie_disable_wins(struct mvebu_pcie_port *port)
{
- const struct mbus_dram_target_info *dram;
- u32 size;
int i;
- dram = mv_mbus_dram_info();
+ mvebu_writel(port, 0, PCIE_BAR_LO_OFF(0));
+ mvebu_writel(port, 0, PCIE_BAR_HI_OFF(0));
- /* First, disable and clear BARs and windows. */
for (i = 1; i < 3; i++) {
mvebu_writel(port, 0, PCIE_BAR_CTRL_OFF(i));
mvebu_writel(port, 0, PCIE_BAR_LO_OFF(i));
@@ -176,6 +177,25 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
mvebu_writel(port, 0, PCIE_WIN5_CTRL_OFF);
mvebu_writel(port, 0, PCIE_WIN5_BASE_OFF);
mvebu_writel(port, 0, PCIE_WIN5_REMAP_OFF);
+}
+
+/*
+ * Setup PCIE BARs and Address Decode Wins:
+ * BAR[0] -> internal registers (needed for MSI)
+ * BAR[1] -> covers all DRAM banks
+ * BAR[2] -> Disabled
+ * WIN[0-3] -> DRAM bank[0-3]
+ */
+static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
+{
+ const struct mbus_dram_target_info *dram;
+ u32 size;
+ int i;
+
+ dram = mv_mbus_dram_info();
+
+ /* First, disable and clear BARs and windows. */
+ mvebu_pcie_disable_wins(port);
/* Setup windows for DDR banks. Count total DDR size on the fly. */
size = 0;
@@ -213,18 +233,47 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
{
- u32 cmd, mask;
+ u32 ctrl, cmd, dev_rev, mask;
- /* Point PCIe unit MBUS decode windows to DRAM space. */
- mvebu_pcie_setup_wins(port);
+ /* Setup PCIe controller to Root Complex mode. */
+ ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
+ ctrl |= PCIE_CTRL_RC_MODE;
+ mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
- /* Master + slave enable. */
+ /* Disable Root Bridge I/O space, memory space and bus mastering. */
cmd = mvebu_readl(port, PCIE_CMD_OFF);
- cmd |= PCI_COMMAND_IO;
- cmd |= PCI_COMMAND_MEMORY;
- cmd |= PCI_COMMAND_MASTER;
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
mvebu_writel(port, cmd, PCIE_CMD_OFF);
+ /*
+ * Change Class Code of PCI Bridge device to PCI Bridge (0x6004)
+ * because default value is Memory controller (0x5080).
+ *
+ * Note that this mvebu PCI Bridge does not have compliant Type 1
+ * Configuration Space. Header Type is reported as Type 0 and it
+ * has format of Type 0 config space.
+ *
+ * Moreover Type 0 BAR registers (ranges 0x10 - 0x28 and 0x30 - 0x34)
+ * have the same format in Marvell's specification as in PCIe
+ * specification, but their meaning is totally different and they do
+ * different things: they are aliased into internal mvebu registers
+ * (e.g. PCIE_BAR_LO_OFF) and these should not be changed or
+ * reconfigured by pci device drivers.
+ *
+ * Therefore driver uses emulation of PCI Bridge which emulates
+ * access to configuration space via internal mvebu registers or
+ * emulated configuration buffer. Driver access these PCI Bridge
+ * directly for simplification, but these registers can be accessed
+ * also via standard mvebu way for accessing PCI config space.
+ */
+ dev_rev = mvebu_readl(port, PCIE_DEV_REV_OFF);
+ dev_rev &= ~0xffffff00;
+ dev_rev |= (PCI_CLASS_BRIDGE_PCI << 8) << 8;
+ mvebu_writel(port, dev_rev, PCIE_DEV_REV_OFF);
+
+ /* Point PCIe unit MBUS decode windows to DRAM space. */
+ mvebu_pcie_setup_wins(port);
+
/* Enable interrupt lines A-D. */
mask = mvebu_readl(port, PCIE_MASK_OFF);
mask |= PCIE_MASK_ENABLE_INTS;
@@ -250,6 +299,9 @@ static int mvebu_pcie_hw_rd_conf(struct mvebu_pcie_port *port,
case 4:
*val = readl_relaxed(conf_data);
break;
+ default:
+ *val = 0xffffffff;
+ return PCIBIOS_BAD_REGISTER_NUMBER;
}
return PCIBIOS_SUCCESSFUL;
@@ -303,7 +355,7 @@ static void mvebu_pcie_del_windows(struct mvebu_pcie_port *port,
* areas each having a power of two size. We start from the largest
* one (i.e highest order bit set in the size).
*/
-static void mvebu_pcie_add_windows(struct mvebu_pcie_port *port,
+static int mvebu_pcie_add_windows(struct mvebu_pcie_port *port,
unsigned int target, unsigned int attribute,
phys_addr_t base, size_t size,
phys_addr_t remap)
@@ -324,7 +376,7 @@ static void mvebu_pcie_add_windows(struct mvebu_pcie_port *port,
&base, &end, ret);
mvebu_pcie_del_windows(port, base - size_mapped,
size_mapped);
- return;
+ return ret;
}
size -= sz;
@@ -333,16 +385,20 @@ static void mvebu_pcie_add_windows(struct mvebu_pcie_port *port,
if (remap != MVEBU_MBUS_NO_REMAP)
remap += sz;
}
+
+ return 0;
}
-static void mvebu_pcie_set_window(struct mvebu_pcie_port *port,
+static int mvebu_pcie_set_window(struct mvebu_pcie_port *port,
unsigned int target, unsigned int attribute,
const struct mvebu_pcie_window *desired,
struct mvebu_pcie_window *cur)
{
+ int ret;
+
if (desired->base == cur->base && desired->remap == cur->remap &&
desired->size == cur->size)
- return;
+ return 0;
if (cur->size != 0) {
mvebu_pcie_del_windows(port, cur->base, cur->size);
@@ -357,31 +413,35 @@ static void mvebu_pcie_set_window(struct mvebu_pcie_port *port,
}
if (desired->size == 0)
- return;
+ return 0;
+
+ ret = mvebu_pcie_add_windows(port, target, attribute, desired->base,
+ desired->size, desired->remap);
+ if (ret) {
+ cur->size = 0;
+ cur->base = 0;
+ return ret;
+ }
- mvebu_pcie_add_windows(port, target, attribute, desired->base,
- desired->size, desired->remap);
*cur = *desired;
+ return 0;
}
-static void mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
+static int mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
{
struct mvebu_pcie_window desired = {};
struct pci_bridge_emul_conf *conf = &port->bridge.conf;
/* Are the new iobase/iolimit values invalid? */
if (conf->iolimit < conf->iobase ||
- conf->iolimitupper < conf->iobaseupper ||
- !(conf->command & PCI_COMMAND_IO)) {
- mvebu_pcie_set_window(port, port->io_target, port->io_attr,
- &desired, &port->iowin);
- return;
- }
+ conf->iolimitupper < conf->iobaseupper)
+ return mvebu_pcie_set_window(port, port->io_target, port->io_attr,
+ &desired, &port->iowin);
if (!mvebu_has_ioport(port)) {
dev_WARN(&port->pcie->pdev->dev,
"Attempt to set IO when IO is disabled\n");
- return;
+ return -EOPNOTSUPP;
}
/*
@@ -399,22 +459,19 @@ static void mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
desired.remap) +
1;
- mvebu_pcie_set_window(port, port->io_target, port->io_attr, &desired,
- &port->iowin);
+ return mvebu_pcie_set_window(port, port->io_target, port->io_attr, &desired,
+ &port->iowin);
}
-static void mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
+static int mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
{
struct mvebu_pcie_window desired = {.remap = MVEBU_MBUS_NO_REMAP};
struct pci_bridge_emul_conf *conf = &port->bridge.conf;
/* Are the new membase/memlimit values invalid? */
- if (conf->memlimit < conf->membase ||
- !(conf->command & PCI_COMMAND_MEMORY)) {
- mvebu_pcie_set_window(port, port->mem_target, port->mem_attr,
- &desired, &port->memwin);
- return;
- }
+ if (conf->memlimit < conf->membase)
+ return mvebu_pcie_set_window(port, port->mem_target, port->mem_attr,
+ &desired, &port->memwin);
/*
* We read the PCI-to-PCI bridge emulated registers, and
@@ -426,8 +483,56 @@ static void mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
desired.size = (((conf->memlimit & 0xFFF0) << 16) | 0xFFFFF) -
desired.base + 1;
- mvebu_pcie_set_window(port, port->mem_target, port->mem_attr, &desired,
- &port->memwin);
+ return mvebu_pcie_set_window(port, port->mem_target, port->mem_attr, &desired,
+ &port->memwin);
+}
+
+static pci_bridge_emul_read_status_t
+mvebu_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge,
+ int reg, u32 *value)
+{
+ struct mvebu_pcie_port *port = bridge->data;
+
+ switch (reg) {
+ case PCI_COMMAND:
+ *value = mvebu_readl(port, PCIE_CMD_OFF);
+ break;
+
+ case PCI_PRIMARY_BUS: {
+ /*
+ * From the whole 32bit register we support reading from HW only
+ * secondary bus number which is mvebu local bus number.
+ * Other bits are retrieved only from emulated config buffer.
+ */
+ __le32 *cfgspace = (__le32 *)&bridge->conf;
+ u32 val = le32_to_cpu(cfgspace[PCI_PRIMARY_BUS / 4]);
+ val &= ~0xff00;
+ val |= mvebu_pcie_get_local_bus_nr(port) << 8;
+ *value = val;
+ break;
+ }
+
+ case PCI_INTERRUPT_LINE: {
+ /*
+ * From the whole 32bit register we support reading from HW only
+ * one bit: PCI_BRIDGE_CTL_BUS_RESET.
+ * Other bits are retrieved only from emulated config buffer.
+ */
+ __le32 *cfgspace = (__le32 *)&bridge->conf;
+ u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]);
+ if (mvebu_readl(port, PCIE_CTRL_OFF) & PCIE_CTRL_MASTER_HOT_RESET)
+ val |= PCI_BRIDGE_CTL_BUS_RESET << 16;
+ else
+ val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16);
+ *value = val;
+ break;
+ }
+
+ default:
+ return PCI_BRIDGE_EMUL_NOT_HANDLED;
+ }
+
+ return PCI_BRIDGE_EMUL_HANDLED;
}
static pci_bridge_emul_read_status_t
@@ -442,9 +547,7 @@ mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
break;
case PCI_EXP_DEVCTL:
- *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL) &
- ~(PCI_EXP_DEVCTL_URRE | PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_CERE);
+ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
break;
case PCI_EXP_LNKCAP:
@@ -468,6 +571,18 @@ mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
*value = mvebu_readl(port, PCIE_RC_RTSTA);
break;
+ case PCI_EXP_DEVCAP2:
+ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCAP2);
+ break;
+
+ case PCI_EXP_DEVCTL2:
+ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
+ break;
+
+ case PCI_EXP_LNKCTL2:
+ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
+ break;
+
default:
return PCI_BRIDGE_EMUL_NOT_HANDLED;
}
@@ -484,39 +599,62 @@ mvebu_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
switch (reg) {
case PCI_COMMAND:
- {
- if (!mvebu_has_ioport(port))
- conf->command &= ~PCI_COMMAND_IO;
-
- if ((old ^ new) & PCI_COMMAND_IO)
- mvebu_pcie_handle_iobase_change(port);
- if ((old ^ new) & PCI_COMMAND_MEMORY)
- mvebu_pcie_handle_membase_change(port);
+ if (!mvebu_has_ioport(port)) {
+ conf->command = cpu_to_le16(
+ le16_to_cpu(conf->command) & ~PCI_COMMAND_IO);
+ new &= ~PCI_COMMAND_IO;
+ }
+ mvebu_writel(port, new, PCIE_CMD_OFF);
break;
- }
case PCI_IO_BASE:
- /*
- * We keep bit 1 set, it is a read-only bit that
- * indicates we support 32 bits addressing for the
- * I/O
- */
- conf->iobase |= PCI_IO_RANGE_TYPE_32;
- conf->iolimit |= PCI_IO_RANGE_TYPE_32;
- mvebu_pcie_handle_iobase_change(port);
+ if ((mask & 0xffff) && mvebu_pcie_handle_iobase_change(port)) {
+ /* On error disable IO range */
+ conf->iobase &= ~0xf0;
+ conf->iolimit &= ~0xf0;
+ conf->iobaseupper = cpu_to_le16(0x0000);
+ conf->iolimitupper = cpu_to_le16(0x0000);
+ if (mvebu_has_ioport(port))
+ conf->iobase |= 0xf0;
+ }
break;
case PCI_MEMORY_BASE:
- mvebu_pcie_handle_membase_change(port);
+ if (mvebu_pcie_handle_membase_change(port)) {
+ /* On error disable mem range */
+ conf->membase = cpu_to_le16(le16_to_cpu(conf->membase) & ~0xfff0);
+ conf->memlimit = cpu_to_le16(le16_to_cpu(conf->memlimit) & ~0xfff0);
+ conf->membase = cpu_to_le16(le16_to_cpu(conf->membase) | 0xfff0);
+ }
break;
case PCI_IO_BASE_UPPER16:
- mvebu_pcie_handle_iobase_change(port);
+ if (mvebu_pcie_handle_iobase_change(port)) {
+ /* On error disable IO range */
+ conf->iobase &= ~0xf0;
+ conf->iolimit &= ~0xf0;
+ conf->iobaseupper = cpu_to_le16(0x0000);
+ conf->iolimitupper = cpu_to_le16(0x0000);
+ if (mvebu_has_ioport(port))
+ conf->iobase |= 0xf0;
+ }
break;
case PCI_PRIMARY_BUS:
- mvebu_pcie_set_local_bus_nr(port, conf->secondary_bus);
+ if (mask & 0xff00)
+ mvebu_pcie_set_local_bus_nr(port, conf->secondary_bus);
+ break;
+
+ case PCI_INTERRUPT_LINE:
+ if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) {
+ u32 ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
+ if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16))
+ ctrl |= PCIE_CTRL_MASTER_HOT_RESET;
+ else
+ ctrl &= ~PCIE_CTRL_MASTER_HOT_RESET;
+ mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
+ }
break;
default:
@@ -532,13 +670,6 @@ mvebu_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
switch (reg) {
case PCI_EXP_DEVCTL:
- /*
- * Armada370 data says these bits must always
- * be zero when in root complex mode.
- */
- new &= ~(PCI_EXP_DEVCTL_URRE | PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_CERE);
-
mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
break;
@@ -555,12 +686,31 @@ mvebu_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
break;
case PCI_EXP_RTSTA:
- mvebu_writel(port, new, PCIE_RC_RTSTA);
+ /*
+ * PME Status bit in Root Status Register (PCIE_RC_RTSTA)
+ * is read-only and can be cleared only by writing 0b to the
+ * Interrupt Cause RW0C register (PCIE_INT_CAUSE_OFF). So
+ * clear PME via Interrupt Cause.
+ */
+ if (new & PCI_EXP_RTSTA_PME)
+ mvebu_writel(port, ~PCIE_INT_PM_PME, PCIE_INT_CAUSE_OFF);
+ break;
+
+ case PCI_EXP_DEVCTL2:
+ mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
+ break;
+
+ case PCI_EXP_LNKCTL2:
+ mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
+ break;
+
+ default:
break;
}
}
static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
+ .read_base = mvebu_pci_bridge_emul_base_conf_read,
.write_base = mvebu_pci_bridge_emul_base_conf_write,
.read_pcie = mvebu_pci_bridge_emul_pcie_conf_read,
.write_pcie = mvebu_pci_bridge_emul_pcie_conf_write,
@@ -570,9 +720,11 @@ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
* Initialize the configuration space of the PCI-to-PCI bridge
* associated with the given PCIe interface.
*/
-static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
+static int mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
{
struct pci_bridge_emul *bridge = &port->bridge;
+ u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP);
+ u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS);
bridge->conf.vendor = PCI_VENDOR_ID_MARVELL;
bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16;
@@ -585,11 +737,17 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32;
}
+ /*
+ * Older mvebu hardware provides PCIe Capability structure only in
+ * version 1. New hardware provides it in version 2.
+ */
+ bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver);
+
bridge->has_pcie = true;
bridge->data = port;
bridge->ops = &mvebu_pci_bridge_emul_ops;
- pci_bridge_emul_init(bridge, PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR);
+ return pci_bridge_emul_init(bridge, PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR);
}
static inline struct mvebu_pcie *sys_to_pcie(struct pci_sys_data *sys)
@@ -606,6 +764,9 @@ static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
for (i = 0; i < pcie->nports; i++) {
struct mvebu_pcie_port *port = &pcie->ports[i];
+ if (!port->base)
+ continue;
+
if (bus->number == 0 && port->devfn == devfn)
return port;
if (bus->number != 0 &&
@@ -653,20 +814,16 @@ static int mvebu_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
int ret;
port = mvebu_pcie_find_port(pcie, bus, devfn);
- if (!port) {
- *val = 0xffffffff;
+ if (!port)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
/* Access the emulated PCI-to-PCI bridge */
if (bus->number == 0)
return pci_bridge_emul_conf_read(&port->bridge, where,
size, val);
- if (!mvebu_pcie_link_up(port)) {
- *val = 0xffffffff;
+ if (!mvebu_pcie_link_up(port))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
/* Access the real PCIe interface */
ret = mvebu_pcie_hw_rd_conf(port, bus, devfn,
@@ -680,6 +837,15 @@ static struct pci_ops mvebu_pcie_ops = {
.write = mvebu_pcie_wr_conf,
};
+static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+ /* Interrupt support on mvebu emulated bridges is not implemented yet */
+ if (dev->bus->number == 0)
+ return 0; /* Proper return code 0 == NO_IRQ */
+
+ return of_irq_parse_and_map_pci(dev, slot, pin);
+}
+
static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
const struct resource *res,
resource_size_t start,
@@ -781,6 +947,8 @@ static int mvebu_pcie_suspend(struct device *dev)
pcie = dev_get_drvdata(dev);
for (i = 0; i < pcie->nports; i++) {
struct mvebu_pcie_port *port = pcie->ports + i;
+ if (!port->base)
+ continue;
port->saved_pcie_stat = mvebu_readl(port, PCIE_STAT_OFF);
}
@@ -795,6 +963,8 @@ static int mvebu_pcie_resume(struct device *dev)
pcie = dev_get_drvdata(dev);
for (i = 0; i < pcie->nports; i++) {
struct mvebu_pcie_port *port = pcie->ports + i;
+ if (!port->base)
+ continue;
mvebu_writel(port, port->saved_pcie_stat, PCIE_STAT_OFF);
mvebu_pcie_setup_hw(port);
}
@@ -838,6 +1008,11 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
port->devfn = of_pci_get_devfn(child);
if (port->devfn < 0)
goto skip;
+ if (PCI_FUNC(port->devfn) != 0) {
+ dev_err(dev, "%s: invalid function number, must be zero\n",
+ port->name);
+ goto skip;
+ }
ret = mvebu_get_tgt_attr(dev->of_node, port->devfn, IORESOURCE_MEM,
&port->mem_target, &port->mem_attr);
@@ -992,6 +1167,10 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
resource_size(&pcie->io) - 1);
pcie->realio.name = "PCI I/O";
+ ret = devm_pci_remap_iospace(dev, &pcie->realio, pcie->io.start);
+ if (ret)
+ return ret;
+
pci_add_resource(&bridge->windows, &pcie->realio);
ret = devm_request_resource(dev, &ioport_resource, &pcie->realio);
if (ret)
@@ -1001,54 +1180,6 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
return 0;
}
-/*
- * This is a copy of pci_host_probe(), except that it does the I/O
- * remap as the last step, once we are sure we won't fail.
- *
- * It should be removed once the I/O remap error handling issue has
- * been sorted out.
- */
-static int mvebu_pci_host_probe(struct pci_host_bridge *bridge)
-{
- struct mvebu_pcie *pcie;
- struct pci_bus *bus, *child;
- int ret;
-
- ret = pci_scan_root_bus_bridge(bridge);
- if (ret < 0) {
- dev_err(bridge->dev.parent, "Scanning root bridge failed");
- return ret;
- }
-
- pcie = pci_host_bridge_priv(bridge);
- if (resource_size(&pcie->io) != 0) {
- unsigned int i;
-
- for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)
- pci_ioremap_io(i, pcie->io.start + i);
- }
-
- bus = bridge->bus;
-
- /*
- * We insert PCI resources into the iomem_resource and
- * ioport_resource trees in either pci_bus_claim_resources()
- * or pci_bus_assign_resources().
- */
- if (pci_has_flag(PCI_PROBE_ONLY)) {
- pci_bus_claim_resources(bus);
- } else {
- pci_bus_size_bridges(bus);
- pci_bus_assign_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
- }
-
- pci_bus_add_devices(bus);
- return 0;
-}
-
static int mvebu_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -1112,9 +1243,93 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
continue;
}
+ ret = mvebu_pci_bridge_emul_init(port);
+ if (ret < 0) {
+ dev_err(dev, "%s: cannot init emulated bridge\n",
+ port->name);
+ devm_iounmap(dev, port->base);
+ port->base = NULL;
+ mvebu_pcie_powerdown(port);
+ continue;
+ }
+
+ /*
+ * PCIe topology exported by mvebu hw is quite complicated. In
+ * reality has something like N fully independent host bridges
+ * where each host bridge has one PCIe Root Port (which acts as
+ * PCI Bridge device). Each host bridge has its own independent
+ * internal registers, independent access to PCI config space,
+ * independent interrupt lines, independent window and memory
+ * access configuration. But additionally there is some kind of
+ * peer-to-peer support between PCIe devices behind different
+ * host bridges limited just to forwarding of memory and I/O
+ * transactions (forwarding of error messages and config cycles
+ * is not supported). So we could say there are N independent
+ * PCIe Root Complexes.
+ *
+ * For this kind of setup DT should have been structured into
+ * N independent PCIe controllers / host bridges. But instead
+ * structure in past was defined to put PCIe Root Ports of all
+ * host bridges into one bus zero, like in classic multi-port
+ * Root Complex setup with just one host bridge.
+ *
+ * This means that pci-mvebu.c driver provides "virtual" bus 0
+ * on which registers all PCIe Root Ports (PCI Bridge devices)
+ * specified in DT by their BDF addresses and virtually routes
+ * PCI config access of each PCI bridge device to specific PCIe
+ * host bridge.
+ *
+ * Normally PCI Bridge should choose between Type 0 and Type 1
+ * config requests based on primary and secondary bus numbers
+ * configured on the bridge itself. But because mvebu PCI Bridge
+ * does not have registers for primary and secondary bus numbers
+ * in its config space, it determinates type of config requests
+ * via its own custom way.
+ *
+ * There are two options how mvebu determinate type of config
+ * request.
+ *
+ * 1. If Secondary Bus Number Enable bit is not set or is not
+ * available (applies for pre-XP PCIe controllers) then Type 0
+ * is used if target bus number equals Local Bus Number (bits
+ * [15:8] in register 0x1a04) and target device number differs
+ * from Local Device Number (bits [20:16] in register 0x1a04).
+ * Type 1 is used if target bus number differs from Local Bus
+ * Number. And when target bus number equals Local Bus Number
+ * and target device equals Local Device Number then request is
+ * routed to Local PCI Bridge (PCIe Root Port).
+ *
+ * 2. If Secondary Bus Number Enable bit is set (bit 7 in
+ * register 0x1a2c) then mvebu hw determinate type of config
+ * request like compliant PCI Bridge based on primary bus number
+ * which is configured via Local Bus Number (bits [15:8] in
+ * register 0x1a04) and secondary bus number which is configured
+ * via Secondary Bus Number (bits [7:0] in register 0x1a2c).
+ * Local PCI Bridge (PCIe Root Port) is available on primary bus
+ * as device with Local Device Number (bits [20:16] in register
+ * 0x1a04).
+ *
+ * Secondary Bus Number Enable bit is disabled by default and
+ * option 2. is not available on pre-XP PCIe controllers. Hence
+ * this driver always use option 1.
+ *
+ * Basically it means that primary and secondary buses shares
+ * one virtual number configured via Local Bus Number bits and
+ * Local Device Number bits determinates if accessing primary
+ * or secondary bus. Set Local Device Number to 1 and redirect
+ * all writes of PCI Bridge Secondary Bus Number register to
+ * Local Bus Number (bits [15:8] in register 0x1a04).
+ *
+ * So when accessing devices on buses behind secondary bus
+ * number it would work correctly. And also when accessing
+ * device 0 at secondary bus number via config space would be
+ * correctly routed to secondary bus. Due to issues described
+ * in mvebu_pcie_setup_hw(), PCI Bridges at primary bus (zero)
+ * are not accessed directly via PCI config space but rarher
+ * indirectly via kernel emulated PCI bridge driver.
+ */
mvebu_pcie_setup_hw(port);
- mvebu_pcie_set_local_dev_nr(port, 1);
- mvebu_pci_bridge_emul_init(port);
+ mvebu_pcie_set_local_dev_nr(port, 0);
}
pcie->nports = i;
@@ -1122,8 +1337,55 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
bridge->sysdata = pcie;
bridge->ops = &mvebu_pcie_ops;
bridge->align_resource = mvebu_pcie_align_resource;
+ bridge->map_irq = mvebu_pcie_map_irq;
+
+ return pci_host_probe(bridge);
+}
+
+static int mvebu_pcie_remove(struct platform_device *pdev)
+{
+ struct mvebu_pcie *pcie = platform_get_drvdata(pdev);
+ struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+ u32 cmd;
+ int i;
+
+ /* Remove PCI bus with all devices. */
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(bridge->bus);
+ pci_remove_root_bus(bridge->bus);
+ pci_unlock_rescan_remove();
+
+ for (i = 0; i < pcie->nports; i++) {
+ struct mvebu_pcie_port *port = &pcie->ports[i];
+
+ if (!port->base)
+ continue;
+
+ /* Disable Root Bridge I/O space, memory space and bus mastering. */
+ cmd = mvebu_readl(port, PCIE_CMD_OFF);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ mvebu_writel(port, cmd, PCIE_CMD_OFF);
+
+ /* Mask all interrupt sources. */
+ mvebu_writel(port, 0, PCIE_MASK_OFF);
+
+ /* Free config space for emulated root bridge. */
+ pci_bridge_emul_cleanup(&port->bridge);
- return mvebu_pci_host_probe(bridge);
+ /* Disable and clear BARs and windows. */
+ mvebu_pcie_disable_wins(port);
+
+ /* Delete PCIe IO and MEM windows. */
+ if (port->iowin.size)
+ mvebu_pcie_del_windows(port, port->iowin.base, port->iowin.size);
+ if (port->memwin.size)
+ mvebu_pcie_del_windows(port, port->memwin.base, port->memwin.size);
+
+ /* Power down card and disable clocks. Must be the last step. */
+ mvebu_pcie_powerdown(port);
+ }
+
+ return 0;
}
static const struct of_device_id mvebu_pcie_of_match_table[] = {
@@ -1142,10 +1404,14 @@ static struct platform_driver mvebu_pcie_driver = {
.driver = {
.name = "mvebu-pcie",
.of_match_table = mvebu_pcie_of_match_table,
- /* driver unloading/unbinding currently not supported */
- .suppress_bind_attrs = true,
.pm = &mvebu_pcie_pm_ops,
},
.probe = mvebu_pcie_probe,
+ .remove = mvebu_pcie_remove,
};
-builtin_platform_driver(mvebu_pcie_driver);
+module_platform_driver(mvebu_pcie_driver);
+
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@bootlin.com>");
+MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
+MODULE_DESCRIPTION("Marvell EBU PCIe controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-rcar-gen2.c b/drivers/pci/controller/pci-rcar-gen2.c
index afde4aa8f6dc..35804ea394fd 100644
--- a/drivers/pci/controller/pci-rcar-gen2.c
+++ b/drivers/pci/controller/pci-rcar-gen2.c
@@ -93,7 +93,7 @@
#define RCAR_PCI_UNIT_REV_REG (RCAR_AHBPCI_PCICOM_OFFSET + 0x48)
-struct rcar_pci_priv {
+struct rcar_pci {
struct device *dev;
void __iomem *reg;
struct resource mem_res;
@@ -105,7 +105,7 @@ struct rcar_pci_priv {
static void __iomem *rcar_pci_cfg_base(struct pci_bus *bus, unsigned int devfn,
int where)
{
- struct rcar_pci_priv *priv = bus->sysdata;
+ struct rcar_pci *priv = bus->sysdata;
int slot, val;
if (!pci_is_root_bus(bus) || PCI_FUNC(devfn))
@@ -132,7 +132,7 @@ static void __iomem *rcar_pci_cfg_base(struct pci_bus *bus, unsigned int devfn,
static irqreturn_t rcar_pci_err_irq(int irq, void *pw)
{
- struct rcar_pci_priv *priv = pw;
+ struct rcar_pci *priv = pw;
struct device *dev = priv->dev;
u32 status = ioread32(priv->reg + RCAR_PCI_INT_STATUS_REG);
@@ -148,7 +148,7 @@ static irqreturn_t rcar_pci_err_irq(int irq, void *pw)
return IRQ_NONE;
}
-static void rcar_pci_setup_errirq(struct rcar_pci_priv *priv)
+static void rcar_pci_setup_errirq(struct rcar_pci *priv)
{
struct device *dev = priv->dev;
int ret;
@@ -166,11 +166,11 @@ static void rcar_pci_setup_errirq(struct rcar_pci_priv *priv)
iowrite32(val, priv->reg + RCAR_PCI_INT_ENABLE_REG);
}
#else
-static inline void rcar_pci_setup_errirq(struct rcar_pci_priv *priv) { }
+static inline void rcar_pci_setup_errirq(struct rcar_pci *priv) { }
#endif
/* PCI host controller setup */
-static void rcar_pci_setup(struct rcar_pci_priv *priv)
+static void rcar_pci_setup(struct rcar_pci *priv)
{
struct pci_host_bridge *bridge = pci_host_bridge_from_priv(priv);
struct device *dev = priv->dev;
@@ -279,7 +279,7 @@ static int rcar_pci_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *cfg_res, *mem_res;
- struct rcar_pci_priv *priv;
+ struct rcar_pci *priv;
struct pci_host_bridge *bridge;
void __iomem *reg;
diff --git a/drivers/pci/controller/pci-thunder-ecam.c b/drivers/pci/controller/pci-thunder-ecam.c
index e9d5ca245f5e..b5bd10a62adb 100644
--- a/drivers/pci/controller/pci-thunder-ecam.c
+++ b/drivers/pci/controller/pci-thunder-ecam.c
@@ -41,10 +41,9 @@ static int handle_ea_bar(u32 e0, int bar, struct pci_bus *bus,
}
if (where_a == 0x4) {
addr = bus->ops->map_bus(bus, devfn, bar); /* BAR 0 */
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
v = readl(addr);
v &= ~0xf;
v |= 2; /* EA entry-1. Base-L */
@@ -56,10 +55,9 @@ static int handle_ea_bar(u32 e0, int bar, struct pci_bus *bus,
u32 barl_rb;
addr = bus->ops->map_bus(bus, devfn, bar); /* BAR 0 */
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
barl_orig = readl(addr + 0);
writel(0xffffffff, addr + 0);
barl_rb = readl(addr + 0);
@@ -72,10 +70,9 @@ static int handle_ea_bar(u32 e0, int bar, struct pci_bus *bus,
}
if (where_a == 0xc) {
addr = bus->ops->map_bus(bus, devfn, bar + 4); /* BAR 1 */
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
v = readl(addr); /* EA entry-3. Base-H */
set_val(v, where, size, val);
return PCIBIOS_SUCCESSFUL;
@@ -104,10 +101,8 @@ static int thunder_ecam_p2_config_read(struct pci_bus *bus, unsigned int devfn,
}
addr = bus->ops->map_bus(bus, devfn, where_a);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
v = readl(addr);
@@ -135,10 +130,8 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
int where_a = where & ~3;
addr = bus->ops->map_bus(bus, devfn, 0xc);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
v = readl(addr);
@@ -146,10 +139,8 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
cfg_type = (v >> 16) & 0x7f;
addr = bus->ops->map_bus(bus, devfn, 8);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
class_rev = readl(addr);
if (class_rev == 0xffffffff)
@@ -176,10 +167,8 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
}
addr = bus->ops->map_bus(bus, devfn, 0);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
vendor_device = readl(addr);
if (vendor_device == 0xffffffff)
@@ -196,10 +185,9 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
bool is_tns = (vendor_device == 0xa01f177d);
addr = bus->ops->map_bus(bus, devfn, 0x70);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
/* E_CAP */
v = readl(addr);
has_msix = (v & 0xff00) != 0;
@@ -211,10 +199,9 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
}
if (where_a == 0xb0) {
addr = bus->ops->map_bus(bus, devfn, where_a);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
v = readl(addr);
if (v & 0xff00)
pr_err("Bad MSIX cap header: %08x\n", v);
@@ -268,10 +255,9 @@ static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
if (where_a == 0x70) {
addr = bus->ops->map_bus(bus, devfn, where_a);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
+
v = readl(addr);
if (v & 0xff00)
pr_err("Bad PCIe cap header: %08x\n", v);
diff --git a/drivers/pci/controller/pci-thunder-pem.c b/drivers/pci/controller/pci-thunder-pem.c
index 0660b9da204f..06a9855cb431 100644
--- a/drivers/pci/controller/pci-thunder-pem.c
+++ b/drivers/pci/controller/pci-thunder-pem.c
@@ -41,10 +41,8 @@ static int thunder_pem_bridge_read(struct pci_bus *bus, unsigned int devfn,
struct pci_config_window *cfg = bus->sysdata;
struct thunder_pem_pci *pem_pci = (struct thunder_pem_pci *)cfg->priv;
- if (devfn != 0 || where >= 2048) {
- *val = ~0;
+ if (devfn != 0 || where >= 2048)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
/*
* 32-bit accesses only. Write the address to the low order
diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
index c50ff279903c..bfa259781b69 100644
--- a/drivers/pci/controller/pci-xgene-msi.c
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -269,9 +269,7 @@ static void xgene_free_domains(struct xgene_msi *msi)
static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
{
- int size = BITS_TO_LONGS(NR_MSI_VEC) * sizeof(long);
-
- xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
+ xgene_msi->bitmap = bitmap_zalloc(NR_MSI_VEC, GFP_KERNEL);
if (!xgene_msi->bitmap)
return -ENOMEM;
@@ -360,7 +358,7 @@ static int xgene_msi_remove(struct platform_device *pdev)
kfree(msi->msi_groups);
- kfree(msi->bitmap);
+ bitmap_free(msi->bitmap);
msi->bitmap = NULL;
xgene_free_domains(msi);
diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c
index 56d0d50338c8..0d5acbfc7143 100644
--- a/drivers/pci/controller/pci-xgene.c
+++ b/drivers/pci/controller/pci-xgene.c
@@ -60,7 +60,7 @@
#define XGENE_PCIE_IP_VER_2 2
#if defined(CONFIG_PCI_XGENE) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
-struct xgene_pcie_port {
+struct xgene_pcie {
struct device_node *node;
struct device *dev;
struct clk *clk;
@@ -71,12 +71,12 @@ struct xgene_pcie_port {
u32 version;
};
-static u32 xgene_pcie_readl(struct xgene_pcie_port *port, u32 reg)
+static u32 xgene_pcie_readl(struct xgene_pcie *port, u32 reg)
{
return readl(port->csr_base + reg);
}
-static void xgene_pcie_writel(struct xgene_pcie_port *port, u32 reg, u32 val)
+static void xgene_pcie_writel(struct xgene_pcie *port, u32 reg, u32 val)
{
writel(val, port->csr_base + reg);
}
@@ -86,15 +86,15 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
return (addr & PCI_BASE_ADDRESS_MEM_MASK) | flags;
}
-static inline struct xgene_pcie_port *pcie_bus_to_port(struct pci_bus *bus)
+static inline struct xgene_pcie *pcie_bus_to_port(struct pci_bus *bus)
{
struct pci_config_window *cfg;
if (acpi_disabled)
- return (struct xgene_pcie_port *)(bus->sysdata);
+ return (struct xgene_pcie *)(bus->sysdata);
cfg = bus->sysdata;
- return (struct xgene_pcie_port *)(cfg->priv);
+ return (struct xgene_pcie *)(cfg->priv);
}
/*
@@ -103,7 +103,7 @@ static inline struct xgene_pcie_port *pcie_bus_to_port(struct pci_bus *bus)
*/
static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
{
- struct xgene_pcie_port *port = pcie_bus_to_port(bus);
+ struct xgene_pcie *port = pcie_bus_to_port(bus);
if (bus->number >= (bus->primary + 1))
return port->cfg_base + AXI_EP_CFG_ACCESS;
@@ -117,7 +117,7 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
*/
static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
{
- struct xgene_pcie_port *port = pcie_bus_to_port(bus);
+ struct xgene_pcie *port = pcie_bus_to_port(bus);
unsigned int b, d, f;
u32 rtdid_val = 0;
@@ -164,18 +164,18 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
- struct xgene_pcie_port *port = pcie_bus_to_port(bus);
+ struct xgene_pcie *port = pcie_bus_to_port(bus);
if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
PCIBIOS_SUCCESSFUL)
return PCIBIOS_DEVICE_NOT_FOUND;
/*
- * The v1 controller has a bug in its Configuration Request
- * Retry Status (CRS) logic: when CRS Software Visibility is
- * enabled and we read the Vendor and Device ID of a non-existent
- * device, the controller fabricates return data of 0xFFFF0001
- * ("device exists but is not ready") instead of 0xFFFFFFFF
+ * The v1 controller has a bug in its Configuration Request Retry
+ * Status (CRS) logic: when CRS Software Visibility is enabled and
+ * we read the Vendor and Device ID of a non-existent device, the
+ * controller fabricates return data of 0xFFFF0001 ("device exists
+ * but is not ready") instead of 0xFFFFFFFF (PCI_ERROR_RESPONSE)
* ("device does not exist"). This causes the PCI core to retry
* the read until it times out. Avoid this by not claiming to
* support CRS SV.
@@ -227,7 +227,7 @@ static int xgene_pcie_ecam_init(struct pci_config_window *cfg, u32 ipversion)
{
struct device *dev = cfg->parent;
struct acpi_device *adev = to_acpi_device(dev);
- struct xgene_pcie_port *port;
+ struct xgene_pcie *port;
struct resource csr;
int ret;
@@ -281,7 +281,7 @@ const struct pci_ecam_ops xgene_v2_pcie_ecam_ops = {
#endif
#if defined(CONFIG_PCI_XGENE)
-static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
+static u64 xgene_pcie_set_ib_mask(struct xgene_pcie *port, u32 addr,
u32 flags, u64 size)
{
u64 mask = (~(size - 1) & PCI_BASE_ADDRESS_MEM_MASK) | flags;
@@ -307,7 +307,7 @@ static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
return mask;
}
-static void xgene_pcie_linkup(struct xgene_pcie_port *port,
+static void xgene_pcie_linkup(struct xgene_pcie *port,
u32 *lanes, u32 *speed)
{
u32 val32;
@@ -322,7 +322,7 @@ static void xgene_pcie_linkup(struct xgene_pcie_port *port,
}
}
-static int xgene_pcie_init_port(struct xgene_pcie_port *port)
+static int xgene_pcie_init_port(struct xgene_pcie *port)
{
struct device *dev = port->dev;
int rc;
@@ -342,7 +342,7 @@ static int xgene_pcie_init_port(struct xgene_pcie_port *port)
return 0;
}
-static int xgene_pcie_map_reg(struct xgene_pcie_port *port,
+static int xgene_pcie_map_reg(struct xgene_pcie *port,
struct platform_device *pdev)
{
struct device *dev = port->dev;
@@ -362,7 +362,7 @@ static int xgene_pcie_map_reg(struct xgene_pcie_port *port,
return 0;
}
-static void xgene_pcie_setup_ob_reg(struct xgene_pcie_port *port,
+static void xgene_pcie_setup_ob_reg(struct xgene_pcie *port,
struct resource *res, u32 offset,
u64 cpu_addr, u64 pci_addr)
{
@@ -394,7 +394,7 @@ static void xgene_pcie_setup_ob_reg(struct xgene_pcie_port *port,
xgene_pcie_writel(port, offset + 0x14, upper_32_bits(pci_addr));
}
-static void xgene_pcie_setup_cfg_reg(struct xgene_pcie_port *port)
+static void xgene_pcie_setup_cfg_reg(struct xgene_pcie *port)
{
u64 addr = port->cfg_addr;
@@ -403,7 +403,7 @@ static void xgene_pcie_setup_cfg_reg(struct xgene_pcie_port *port)
xgene_pcie_writel(port, CFGCTL, EN_REG);
}
-static int xgene_pcie_map_ranges(struct xgene_pcie_port *port)
+static int xgene_pcie_map_ranges(struct xgene_pcie *port)
{
struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port);
struct resource_entry *window;
@@ -444,7 +444,7 @@ static int xgene_pcie_map_ranges(struct xgene_pcie_port *port)
return 0;
}
-static void xgene_pcie_setup_pims(struct xgene_pcie_port *port, u32 pim_reg,
+static void xgene_pcie_setup_pims(struct xgene_pcie *port, u32 pim_reg,
u64 pim, u64 size)
{
xgene_pcie_writel(port, pim_reg, lower_32_bits(pim));
@@ -465,7 +465,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size)
return 1;
}
- if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) {
+ if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) {
*ib_reg_mask |= (1 << 0);
return 0;
}
@@ -478,7 +478,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size)
return -EINVAL;
}
-static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port,
+static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port,
struct resource_entry *entry,
u8 *ib_reg_mask)
{
@@ -529,7 +529,7 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port,
xgene_pcie_setup_pims(port, pim_reg, pci_addr, ~(size - 1));
}
-static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie_port *port)
+static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port)
{
struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port);
struct resource_entry *entry;
@@ -542,7 +542,7 @@ static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie_port *port)
}
/* clear BAR configuration which was done by firmware */
-static void xgene_pcie_clear_config(struct xgene_pcie_port *port)
+static void xgene_pcie_clear_config(struct xgene_pcie *port)
{
int i;
@@ -550,7 +550,7 @@ static void xgene_pcie_clear_config(struct xgene_pcie_port *port)
xgene_pcie_writel(port, i, 0);
}
-static int xgene_pcie_setup(struct xgene_pcie_port *port)
+static int xgene_pcie_setup(struct xgene_pcie *port)
{
struct device *dev = port->dev;
u32 val, lanes = 0, speed = 0;
@@ -588,7 +588,7 @@ static int xgene_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct device_node *dn = dev->of_node;
- struct xgene_pcie_port *port;
+ struct xgene_pcie *port;
struct pci_host_bridge *bridge;
int ret;
diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c
index 2513e9363236..18b2361d6462 100644
--- a/drivers/pci/controller/pcie-altera.c
+++ b/drivers/pci/controller/pcie-altera.c
@@ -510,10 +510,8 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
if (altera_pcie_hide_rc_bar(bus, devfn, where))
return PCIBIOS_BAD_REGISTER_NUMBER;
- if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn))) {
- *value = 0xffffffff;
+ if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn)))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
return _altera_pcie_cfg_read(pcie, bus->number, devfn, where, size,
value);
@@ -767,7 +765,7 @@ static int altera_pcie_probe(struct platform_device *pdev)
struct altera_pcie *pcie;
struct pci_host_bridge *bridge;
int ret;
- const struct of_device_id *match;
+ const struct altera_pcie_data *data;
bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
if (!bridge)
@@ -777,11 +775,11 @@ static int altera_pcie_probe(struct platform_device *pdev)
pcie->pdev = pdev;
platform_set_drvdata(pdev, pcie);
- match = of_match_device(altera_pcie_of_match, &pdev->dev);
- if (!match)
+ data = of_device_get_match_data(&pdev->dev);
+ if (!data)
return -ENODEV;
- pcie->pcie_data = match->data;
+ pcie->pcie_data = data;
ret = altera_pcie_parse_dt(pcie);
if (ret) {
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index b090924b41fe..854d95163112 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -42,8 +42,9 @@
#define CORE_FABRIC_STAT_MASK 0x001F001F
#define CORE_LANE_CFG(port) (0x84000 + 0x4000 * (port))
#define CORE_LANE_CFG_REFCLK0REQ BIT(0)
-#define CORE_LANE_CFG_REFCLK1 BIT(1)
+#define CORE_LANE_CFG_REFCLK1REQ BIT(1)
#define CORE_LANE_CFG_REFCLK0ACK BIT(2)
+#define CORE_LANE_CFG_REFCLK1ACK BIT(3)
#define CORE_LANE_CFG_REFCLKEN (BIT(9) | BIT(10))
#define CORE_LANE_CTL(port) (0x84004 + 0x4000 * (port))
#define CORE_LANE_CTL_CFGACC BIT(15)
@@ -482,9 +483,9 @@ static int apple_pcie_setup_refclk(struct apple_pcie *pcie,
if (res < 0)
return res;
- rmw_set(CORE_LANE_CFG_REFCLK1, pcie->base + CORE_LANE_CFG(port->idx));
+ rmw_set(CORE_LANE_CFG_REFCLK1REQ, pcie->base + CORE_LANE_CFG(port->idx));
res = readl_relaxed_poll_timeout(pcie->base + CORE_LANE_CFG(port->idx),
- stat, stat & CORE_LANE_CFG_REFCLK1,
+ stat, stat & CORE_LANE_CFG_REFCLK1ACK,
100, 50000);
if (res < 0)
@@ -563,6 +564,9 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie,
return ret;
}
+ rmw_clear(PORT_REFCLK_CGDIS, port->base + PORT_REFCLK);
+ rmw_clear(PORT_APPCLK_CGDIS, port->base + PORT_APPCLK);
+
ret = apple_pcie_port_setup_irq(port);
if (ret)
return ret;
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index a267cd5b3233..375c0c40bbf8 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -24,6 +24,7 @@
#include <linux/pci.h>
#include <linux/pci-ecam.h>
#include <linux/printk.h>
+#include <linux/regulator/consumer.h>
#include <linux/reset.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -145,6 +146,9 @@
#define BRCM_INT_PCI_MSI_NR 32
#define BRCM_INT_PCI_MSI_LEGACY_NR 8
#define BRCM_INT_PCI_MSI_SHIFT 0
+#define BRCM_INT_PCI_MSI_MASK GENMASK(BRCM_INT_PCI_MSI_NR - 1, 0)
+#define BRCM_INT_PCI_MSI_LEGACY_MASK GENMASK(31, \
+ 32 - BRCM_INT_PCI_MSI_LEGACY_NR)
/* MSI target addresses */
#define BRCM_MSI_TARGET_ADDR_LT_4GB 0x0fffffffcULL
@@ -192,6 +196,8 @@ static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie,
static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val);
static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val);
static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val);
+static int brcm_pcie_linkup(struct brcm_pcie *pcie);
+static int brcm_pcie_add_bus(struct pci_bus *bus);
enum {
RGR1_SW_INIT_1,
@@ -280,6 +286,14 @@ static const struct pcie_cfg_data bcm2711_cfg = {
.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
};
+struct subdev_regulators {
+ unsigned int num_supplies;
+ struct regulator_bulk_data supplies[];
+};
+
+static int pci_subdev_regulators_add_bus(struct pci_bus *bus);
+static void pci_subdev_regulators_remove_bus(struct pci_bus *bus);
+
struct brcm_msi {
struct device *dev;
void __iomem *base;
@@ -289,8 +303,7 @@ struct brcm_msi {
struct mutex lock; /* guards the alloc/free operations */
u64 target_addr;
int irq;
- /* used indicates which MSI interrupts have been alloc'd */
- unsigned long used;
+ DECLARE_BITMAP(used, BRCM_INT_PCI_MSI_NR);
bool legacy;
/* Some chips have MSIs in bits [31..24] of a shared register. */
int legacy_shift;
@@ -318,6 +331,9 @@ struct brcm_pcie {
u32 hw_rev;
void (*perst_set)(struct brcm_pcie *pcie, u32 val);
void (*bridge_sw_init_set)(struct brcm_pcie *pcie, u32 val);
+ bool refusal_mode;
+ struct subdev_regulators *sr;
+ bool ep_wakeup_capable;
};
static inline bool is_bmips(const struct brcm_pcie *pcie)
@@ -434,6 +450,99 @@ static int brcm_pcie_set_ssc(struct brcm_pcie *pcie)
return ssc && pll ? 0 : -EIO;
}
+static void *alloc_subdev_regulators(struct device *dev)
+{
+ static const char * const supplies[] = {
+ "vpcie3v3",
+ "vpcie3v3aux",
+ "vpcie12v",
+ };
+ const size_t size = sizeof(struct subdev_regulators)
+ + sizeof(struct regulator_bulk_data) * ARRAY_SIZE(supplies);
+ struct subdev_regulators *sr;
+ int i;
+
+ sr = devm_kzalloc(dev, size, GFP_KERNEL);
+ if (sr) {
+ sr->num_supplies = ARRAY_SIZE(supplies);
+ for (i = 0; i < ARRAY_SIZE(supplies); i++)
+ sr->supplies[i].supply = supplies[i];
+ }
+
+ return sr;
+}
+
+static int pci_subdev_regulators_add_bus(struct pci_bus *bus)
+{
+ struct device *dev = &bus->dev;
+ struct subdev_regulators *sr;
+ int ret;
+
+ if (!dev->of_node || !bus->parent || !pci_is_root_bus(bus->parent))
+ return 0;
+
+ if (dev->driver_data)
+ dev_err(dev, "dev.driver_data unexpectedly non-NULL\n");
+
+ sr = alloc_subdev_regulators(dev);
+ if (!sr)
+ return -ENOMEM;
+
+ dev->driver_data = sr;
+ ret = regulator_bulk_get(dev, sr->num_supplies, sr->supplies);
+ if (ret)
+ return ret;
+
+ ret = regulator_bulk_enable(sr->num_supplies, sr->supplies);
+ if (ret) {
+ dev_err(dev, "failed to enable regulators for downstream device\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int brcm_pcie_add_bus(struct pci_bus *bus)
+{
+ struct device *dev = &bus->dev;
+ struct brcm_pcie *pcie = (struct brcm_pcie *) bus->sysdata;
+ int ret;
+
+ if (!dev->of_node || !bus->parent || !pci_is_root_bus(bus->parent))
+ return 0;
+
+ ret = pci_subdev_regulators_add_bus(bus);
+ if (ret)
+ return ret;
+
+ /* Grab the regulators for suspend/resume */
+ pcie->sr = bus->dev.driver_data;
+
+ /*
+ * If we have failed linkup there is no point to return an error as
+ * currently it will cause a WARNING() from pci_alloc_child_bus().
+ * We return 0 and turn on the "refusal_mode" so that any further
+ * accesses to the pci_dev just get 0xffffffff
+ */
+ if (brcm_pcie_linkup(pcie) != 0)
+ pcie->refusal_mode = true;
+
+ return 0;
+}
+
+static void pci_subdev_regulators_remove_bus(struct pci_bus *bus)
+{
+ struct device *dev = &bus->dev;
+ struct subdev_regulators *sr = dev->driver_data;
+
+ if (!sr || !bus->parent || !pci_is_root_bus(bus->parent))
+ return;
+
+ if (regulator_bulk_disable(sr->num_supplies, sr->supplies))
+ dev_err(dev, "failed to disable regulators for downstream device\n");
+ dev->driver_data = NULL;
+}
+
/* Limits operation to a specific generation (1, 2, or 3) */
static void brcm_pcie_set_gen(struct brcm_pcie *pcie, int gen)
{
@@ -565,7 +674,7 @@ static int brcm_msi_alloc(struct brcm_msi *msi)
int hwirq;
mutex_lock(&msi->lock);
- hwirq = bitmap_find_free_region(&msi->used, msi->nr, 0);
+ hwirq = bitmap_find_free_region(msi->used, msi->nr, 0);
mutex_unlock(&msi->lock);
return hwirq;
@@ -574,7 +683,7 @@ static int brcm_msi_alloc(struct brcm_msi *msi)
static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq)
{
mutex_lock(&msi->lock);
- bitmap_release_region(&msi->used, hwirq, 0);
+ bitmap_release_region(msi->used, hwirq, 0);
mutex_unlock(&msi->lock);
}
@@ -650,7 +759,8 @@ static void brcm_msi_remove(struct brcm_pcie *pcie)
static void brcm_msi_set_regs(struct brcm_msi *msi)
{
- u32 val = __GENMASK(31, msi->legacy_shift);
+ u32 val = msi->legacy ? BRCM_INT_PCI_MSI_LEGACY_MASK :
+ BRCM_INT_PCI_MSI_MASK;
writel(val, msi->intr_base + MSI_INT_MASK_CLR);
writel(val, msi->intr_base + MSI_INT_CLR);
@@ -692,6 +802,12 @@ static int brcm_pcie_enable_msi(struct brcm_pcie *pcie)
msi->irq = irq;
msi->legacy = pcie->hw_rev < BRCM_PCIE_HW_REV_33;
+ /*
+ * Sanity check to make sure that the 'used' bitmap in struct brcm_msi
+ * is large enough.
+ */
+ BUILD_BUG_ON(BRCM_INT_PCI_MSI_LEGACY_NR > BRCM_INT_PCI_MSI_NR);
+
if (msi->legacy) {
msi->intr_base = msi->base + PCIE_INTR2_CPU_BASE;
msi->nr = BRCM_INT_PCI_MSI_LEGACY_NR;
@@ -742,6 +858,18 @@ static void __iomem *brcm_pcie_map_conf(struct pci_bus *bus, unsigned int devfn,
/* Accesses to the RC go right to the RC registers if slot==0 */
if (pci_is_root_bus(bus))
return PCI_SLOT(devfn) ? NULL : base + where;
+ if (pcie->refusal_mode) {
+ /*
+ * At this point we do not have link. There will be a CPU
+ * abort -- a quirk with this controller --if Linux tries
+ * to read any config-space registers besides those
+ * targeting the host bridge. To prevent this we hijack
+ * the address to point to a safe access that will return
+ * 0xffffffff.
+ */
+ writel(0xffffffff, base + PCIE_MISC_RC_BAR2_CONFIG_HI);
+ return base + PCIE_MISC_RC_BAR2_CONFIG_HI + (where & 0x3);
+ }
/* For devices, write to the config space index register */
idx = PCIE_ECAM_OFFSET(bus->number, devfn, 0);
@@ -770,6 +898,8 @@ static struct pci_ops brcm_pcie_ops = {
.map_bus = brcm_pcie_map_conf,
.read = pci_generic_config_read,
.write = pci_generic_config_write,
+ .add_bus = brcm_pcie_add_bus,
+ .remove_bus = pci_subdev_regulators_remove_bus,
};
static struct pci_ops brcm_pcie_ops32 = {
@@ -917,16 +1047,9 @@ static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
static int brcm_pcie_setup(struct brcm_pcie *pcie)
{
- struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
u64 rc_bar2_offset, rc_bar2_size;
void __iomem *base = pcie->base;
- struct device *dev = pcie->dev;
- struct resource_entry *entry;
- bool ssc_good = false;
- struct resource *res;
- int num_out_wins = 0;
- u16 nlw, cls, lnksta;
- int i, ret, memc;
+ int ret, memc;
u32 tmp, burst, aspm_support;
/* Reset the bridge */
@@ -1016,6 +1139,40 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
if (pcie->gen)
brcm_pcie_set_gen(pcie, pcie->gen);
+ /* Don't advertise L0s capability if 'aspm-no-l0s' */
+ aspm_support = PCIE_LINK_STATE_L1;
+ if (!of_property_read_bool(pcie->np, "aspm-no-l0s"))
+ aspm_support |= PCIE_LINK_STATE_L0S;
+ tmp = readl(base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
+ u32p_replace_bits(&tmp, aspm_support,
+ PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK);
+ writel(tmp, base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
+
+ /*
+ * For config space accesses on the RC, show the right class for
+ * a PCIe-PCIe bridge (the default setting is to be EP mode).
+ */
+ tmp = readl(base + PCIE_RC_CFG_PRIV1_ID_VAL3);
+ u32p_replace_bits(&tmp, 0x060400,
+ PCIE_RC_CFG_PRIV1_ID_VAL3_CLASS_CODE_MASK);
+ writel(tmp, base + PCIE_RC_CFG_PRIV1_ID_VAL3);
+
+ return 0;
+}
+
+static int brcm_pcie_linkup(struct brcm_pcie *pcie)
+{
+ struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+ struct device *dev = pcie->dev;
+ void __iomem *base = pcie->base;
+ struct resource_entry *entry;
+ struct resource *res;
+ int num_out_wins = 0;
+ u16 nlw, cls, lnksta;
+ bool ssc_good = false;
+ u32 tmp;
+ int ret, i;
+
/* Unassert the fundamental reset */
pcie->perst_set(pcie, 0);
@@ -1066,24 +1223,6 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
num_out_wins++;
}
- /* Don't advertise L0s capability if 'aspm-no-l0s' */
- aspm_support = PCIE_LINK_STATE_L1;
- if (!of_property_read_bool(pcie->np, "aspm-no-l0s"))
- aspm_support |= PCIE_LINK_STATE_L0S;
- tmp = readl(base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
- u32p_replace_bits(&tmp, aspm_support,
- PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK);
- writel(tmp, base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
-
- /*
- * For config space accesses on the RC, show the right class for
- * a PCIe-PCIe bridge (the default setting is to be EP mode).
- */
- tmp = readl(base + PCIE_RC_CFG_PRIV1_ID_VAL3);
- u32p_replace_bits(&tmp, 0x060400,
- PCIE_RC_CFG_PRIV1_ID_VAL3_CLASS_CODE_MASK);
- writel(tmp, base + PCIE_RC_CFG_PRIV1_ID_VAL3);
-
if (pcie->ssc) {
ret = brcm_pcie_set_ssc(pcie);
if (ret == 0)
@@ -1212,17 +1351,60 @@ static void brcm_pcie_turn_off(struct brcm_pcie *pcie)
pcie->bridge_sw_init_set(pcie, 1);
}
+static int pci_dev_may_wakeup(struct pci_dev *dev, void *data)
+{
+ bool *ret = data;
+
+ if (device_may_wakeup(&dev->dev)) {
+ *ret = true;
+ dev_info(&dev->dev, "disable cancelled for wake-up device\n");
+ }
+ return (int) *ret;
+}
+
static int brcm_pcie_suspend(struct device *dev)
{
struct brcm_pcie *pcie = dev_get_drvdata(dev);
+ struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
int ret;
brcm_pcie_turn_off(pcie);
- ret = brcm_phy_stop(pcie);
- reset_control_rearm(pcie->rescal);
+ /*
+ * If brcm_phy_stop() returns an error, just dev_err(). If we
+ * return the error it will cause the suspend to fail and this is a
+ * forgivable offense that will probably be erased on resume.
+ */
+ if (brcm_phy_stop(pcie))
+ dev_err(dev, "Could not stop phy for suspend\n");
+
+ ret = reset_control_rearm(pcie->rescal);
+ if (ret) {
+ dev_err(dev, "Could not rearm rescal reset\n");
+ return ret;
+ }
+
+ if (pcie->sr) {
+ /*
+ * Now turn off the regulators, but if at least one
+ * downstream device is enabled as a wake-up source, do not
+ * turn off regulators.
+ */
+ pcie->ep_wakeup_capable = false;
+ pci_walk_bus(bridge->bus, pci_dev_may_wakeup,
+ &pcie->ep_wakeup_capable);
+ if (!pcie->ep_wakeup_capable) {
+ ret = regulator_bulk_disable(pcie->sr->num_supplies,
+ pcie->sr->supplies);
+ if (ret) {
+ dev_err(dev, "Could not turn off regulators\n");
+ reset_control_reset(pcie->rescal);
+ return ret;
+ }
+ }
+ }
clk_disable_unprepare(pcie->clk);
- return ret;
+ return 0;
}
static int brcm_pcie_resume(struct device *dev)
@@ -1233,11 +1415,32 @@ static int brcm_pcie_resume(struct device *dev)
int ret;
base = pcie->base;
- clk_prepare_enable(pcie->clk);
+ ret = clk_prepare_enable(pcie->clk);
+ if (ret)
+ return ret;
+
+ if (pcie->sr) {
+ if (pcie->ep_wakeup_capable) {
+ /*
+ * We are resuming from a suspend. In the suspend we
+ * did not disable the power supplies, so there is
+ * no need to enable them (and falsely increase their
+ * usage count).
+ */
+ pcie->ep_wakeup_capable = false;
+ } else {
+ ret = regulator_bulk_enable(pcie->sr->num_supplies,
+ pcie->sr->supplies);
+ if (ret) {
+ dev_err(dev, "Could not turn on regulators\n");
+ goto err_disable_clk;
+ }
+ }
+ }
ret = reset_control_reset(pcie->rescal);
if (ret)
- goto err_disable_clk;
+ goto err_regulator;
ret = brcm_phy_start(pcie);
if (ret)
@@ -1258,6 +1461,10 @@ static int brcm_pcie_resume(struct device *dev)
if (ret)
goto err_reset;
+ ret = brcm_pcie_linkup(pcie);
+ if (ret)
+ goto err_reset;
+
if (pcie->msi)
brcm_msi_set_regs(pcie->msi);
@@ -1265,6 +1472,9 @@ static int brcm_pcie_resume(struct device *dev)
err_reset:
reset_control_rearm(pcie->rescal);
+err_regulator:
+ if (pcie->sr)
+ regulator_bulk_disable(pcie->sr->num_supplies, pcie->sr->supplies);
err_disable_clk:
clk_disable_unprepare(pcie->clk);
return ret;
@@ -1274,8 +1484,10 @@ static void __brcm_pcie_remove(struct brcm_pcie *pcie)
{
brcm_msi_remove(pcie);
brcm_pcie_turn_off(pcie);
- brcm_phy_stop(pcie);
- reset_control_rearm(pcie->rescal);
+ if (brcm_phy_stop(pcie))
+ dev_err(pcie->dev, "Could not stop phy\n");
+ if (reset_control_rearm(pcie->rescal))
+ dev_err(pcie->dev, "Could not rearm rescal reset\n");
clk_disable_unprepare(pcie->clk);
}
@@ -1394,7 +1606,17 @@ static int brcm_pcie_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, pcie);
- return pci_host_probe(bridge);
+ ret = pci_host_probe(bridge);
+ if (!ret && !brcm_pcie_link_up(pcie))
+ ret = -ENODEV;
+
+ if (ret) {
+ brcm_pcie_remove(pdev);
+ return ret;
+ }
+
+ return 0;
+
fail:
__brcm_pcie_remove(pcie);
return ret;
@@ -1403,8 +1625,8 @@ fail:
MODULE_DEVICE_TABLE(of, brcm_pcie_match);
static const struct dev_pm_ops brcm_pcie_pm_ops = {
- .suspend = brcm_pcie_suspend,
- .resume = brcm_pcie_resume,
+ .suspend_noirq = brcm_pcie_suspend,
+ .resume_noirq = brcm_pcie_resume,
};
static struct platform_driver brcm_pcie_driver = {
diff --git a/drivers/pci/controller/pcie-iproc-bcma.c b/drivers/pci/controller/pcie-iproc-bcma.c
index f918c713afb0..54b6e6d5bc64 100644
--- a/drivers/pci/controller/pcie-iproc-bcma.c
+++ b/drivers/pci/controller/pcie-iproc-bcma.c
@@ -23,7 +23,7 @@ static void bcma_pcie2_fixup_class(struct pci_dev *dev)
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8011, bcma_pcie2_fixup_class);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8012, bcma_pcie2_fixup_class);
-static int iproc_pcie_bcma_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int iproc_bcma_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
struct iproc_pcie *pcie = dev->sysdata;
struct bcma_device *bdev = container_of(pcie->dev, struct bcma_device, dev);
@@ -31,7 +31,7 @@ static int iproc_pcie_bcma_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
return bcma_core_irq(bdev, 5);
}
-static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
+static int iproc_bcma_pcie_probe(struct bcma_device *bdev)
{
struct device *dev = &bdev->dev;
struct iproc_pcie *pcie;
@@ -64,33 +64,33 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
if (ret)
return ret;
- pcie->map_irq = iproc_pcie_bcma_map_irq;
+ pcie->map_irq = iproc_bcma_pcie_map_irq;
bcma_set_drvdata(bdev, pcie);
return iproc_pcie_setup(pcie, &bridge->windows);
}
-static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
+static void iproc_bcma_pcie_remove(struct bcma_device *bdev)
{
struct iproc_pcie *pcie = bcma_get_drvdata(bdev);
iproc_pcie_remove(pcie);
}
-static const struct bcma_device_id iproc_pcie_bcma_table[] = {
+static const struct bcma_device_id iproc_bcma_pcie_table[] = {
BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_PCIEG2, BCMA_ANY_REV, BCMA_ANY_CLASS),
{},
};
-MODULE_DEVICE_TABLE(bcma, iproc_pcie_bcma_table);
+MODULE_DEVICE_TABLE(bcma, iproc_bcma_pcie_table);
-static struct bcma_driver iproc_pcie_bcma_driver = {
+static struct bcma_driver iproc_bcma_pcie_driver = {
.name = KBUILD_MODNAME,
- .id_table = iproc_pcie_bcma_table,
- .probe = iproc_pcie_bcma_probe,
- .remove = iproc_pcie_bcma_remove,
+ .id_table = iproc_bcma_pcie_table,
+ .probe = iproc_bcma_pcie_probe,
+ .remove = iproc_bcma_pcie_remove,
};
-module_bcma_driver(iproc_pcie_bcma_driver);
+module_bcma_driver(iproc_bcma_pcie_driver);
MODULE_AUTHOR("Hauke Mehrtens");
MODULE_DESCRIPTION("Broadcom iProc PCIe BCMA driver");
diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c
index b93e7bda101b..538115246c79 100644
--- a/drivers/pci/controller/pcie-iproc-platform.c
+++ b/drivers/pci/controller/pcie-iproc-platform.c
@@ -37,7 +37,7 @@ static const struct of_device_id iproc_pcie_of_match_table[] = {
};
MODULE_DEVICE_TABLE(of, iproc_pcie_of_match_table);
-static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
+static int iproc_pltfm_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct iproc_pcie *pcie;
@@ -115,30 +115,30 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
return 0;
}
-static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
+static int iproc_pltfm_pcie_remove(struct platform_device *pdev)
{
struct iproc_pcie *pcie = platform_get_drvdata(pdev);
return iproc_pcie_remove(pcie);
}
-static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
+static void iproc_pltfm_pcie_shutdown(struct platform_device *pdev)
{
struct iproc_pcie *pcie = platform_get_drvdata(pdev);
iproc_pcie_shutdown(pcie);
}
-static struct platform_driver iproc_pcie_pltfm_driver = {
+static struct platform_driver iproc_pltfm_pcie_driver = {
.driver = {
.name = "iproc-pcie",
.of_match_table = of_match_ptr(iproc_pcie_of_match_table),
},
- .probe = iproc_pcie_pltfm_probe,
- .remove = iproc_pcie_pltfm_remove,
- .shutdown = iproc_pcie_pltfm_shutdown,
+ .probe = iproc_pltfm_pcie_probe,
+ .remove = iproc_pltfm_pcie_remove,
+ .shutdown = iproc_pltfm_pcie_shutdown,
};
-module_platform_driver(iproc_pcie_pltfm_driver);
+module_platform_driver(iproc_pltfm_pcie_driver);
MODULE_AUTHOR("Ray Jui <rjui@broadcom.com>");
MODULE_DESCRIPTION("Broadcom iPROC PCIe platform driver");
diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c
index 36b9d2c46cfa..b3e75bc61ff1 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -659,10 +659,8 @@ static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie,
void __iomem *addr;
addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3);
- if (!addr) {
- *val = ~0;
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
*val = readl(addr);
diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
index 17c59b0d6978..7705d61fba4c 100644
--- a/drivers/pci/controller/pcie-mediatek-gen3.c
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -79,6 +79,9 @@
#define PCIE_ICMD_PM_REG 0x198
#define PCIE_TURN_OFF_LINK BIT(4)
+#define PCIE_MISC_CTRL_REG 0x348
+#define PCIE_DISABLE_DVFSRC_VLT_REQ BIT(1)
+
#define PCIE_TRANS_TABLE_BASE_REG 0x800
#define PCIE_ATR_SRC_ADDR_MSB_OFFSET 0x4
#define PCIE_ATR_TRSL_ADDR_LSB_OFFSET 0x8
@@ -110,7 +113,7 @@ struct mtk_msi_set {
};
/**
- * struct mtk_pcie_port - PCIe port information
+ * struct mtk_gen3_pcie - PCIe port information
* @dev: pointer to PCIe device
* @base: IO mapped register base
* @reg_base: physical register base
@@ -129,7 +132,7 @@ struct mtk_msi_set {
* @lock: lock protecting IRQ bit map
* @msi_irq_in_use: bit map for assigned MSI IRQ
*/
-struct mtk_pcie_port {
+struct mtk_gen3_pcie {
struct device *dev;
void __iomem *base;
phys_addr_t reg_base;
@@ -162,7 +165,7 @@ struct mtk_pcie_port {
static void mtk_pcie_config_tlp_header(struct pci_bus *bus, unsigned int devfn,
int where, int size)
{
- struct mtk_pcie_port *port = bus->sysdata;
+ struct mtk_gen3_pcie *pcie = bus->sysdata;
int bytes;
u32 val;
@@ -171,15 +174,15 @@ static void mtk_pcie_config_tlp_header(struct pci_bus *bus, unsigned int devfn,
val = PCIE_CFG_FORCE_BYTE_EN | PCIE_CFG_BYTE_EN(bytes) |
PCIE_CFG_HEADER(bus->number, devfn);
- writel_relaxed(val, port->base + PCIE_CFGNUM_REG);
+ writel_relaxed(val, pcie->base + PCIE_CFGNUM_REG);
}
static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
int where)
{
- struct mtk_pcie_port *port = bus->sysdata;
+ struct mtk_gen3_pcie *pcie = bus->sysdata;
- return port->base + PCIE_CFG_OFFSET_ADDR + where;
+ return pcie->base + PCIE_CFG_OFFSET_ADDR + where;
}
static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
@@ -207,7 +210,7 @@ static struct pci_ops mtk_pcie_ops = {
.write = mtk_pcie_config_write,
};
-static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port,
+static int mtk_pcie_set_trans_table(struct mtk_gen3_pcie *pcie,
resource_size_t cpu_addr,
resource_size_t pci_addr,
resource_size_t size,
@@ -217,12 +220,12 @@ static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port,
u32 val;
if (num >= PCIE_MAX_TRANS_TABLES) {
- dev_err(port->dev, "not enough translate table for addr: %#llx, limited to [%d]\n",
+ dev_err(pcie->dev, "not enough translate table for addr: %#llx, limited to [%d]\n",
(unsigned long long)cpu_addr, PCIE_MAX_TRANS_TABLES);
return -ENODEV;
}
- table = port->base + PCIE_TRANS_TABLE_BASE_REG +
+ table = pcie->base + PCIE_TRANS_TABLE_BASE_REG +
num * PCIE_ATR_TLB_SET_OFFSET;
writel_relaxed(lower_32_bits(cpu_addr) | PCIE_ATR_SIZE(fls(size) - 1),
@@ -244,66 +247,71 @@ static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port,
return 0;
}
-static void mtk_pcie_enable_msi(struct mtk_pcie_port *port)
+static void mtk_pcie_enable_msi(struct mtk_gen3_pcie *pcie)
{
int i;
u32 val;
for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
- struct mtk_msi_set *msi_set = &port->msi_sets[i];
+ struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
- msi_set->base = port->base + PCIE_MSI_SET_BASE_REG +
+ msi_set->base = pcie->base + PCIE_MSI_SET_BASE_REG +
i * PCIE_MSI_SET_OFFSET;
- msi_set->msg_addr = port->reg_base + PCIE_MSI_SET_BASE_REG +
+ msi_set->msg_addr = pcie->reg_base + PCIE_MSI_SET_BASE_REG +
i * PCIE_MSI_SET_OFFSET;
/* Configure the MSI capture address */
writel_relaxed(lower_32_bits(msi_set->msg_addr), msi_set->base);
writel_relaxed(upper_32_bits(msi_set->msg_addr),
- port->base + PCIE_MSI_SET_ADDR_HI_BASE +
+ pcie->base + PCIE_MSI_SET_ADDR_HI_BASE +
i * PCIE_MSI_SET_ADDR_HI_OFFSET);
}
- val = readl_relaxed(port->base + PCIE_MSI_SET_ENABLE_REG);
+ val = readl_relaxed(pcie->base + PCIE_MSI_SET_ENABLE_REG);
val |= PCIE_MSI_SET_ENABLE;
- writel_relaxed(val, port->base + PCIE_MSI_SET_ENABLE_REG);
+ writel_relaxed(val, pcie->base + PCIE_MSI_SET_ENABLE_REG);
- val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
val |= PCIE_MSI_ENABLE;
- writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+ writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
}
-static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
+static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie)
{
struct resource_entry *entry;
- struct pci_host_bridge *host = pci_host_bridge_from_priv(port);
+ struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
unsigned int table_index = 0;
int err;
u32 val;
/* Set as RC mode */
- val = readl_relaxed(port->base + PCIE_SETTING_REG);
+ val = readl_relaxed(pcie->base + PCIE_SETTING_REG);
val |= PCIE_RC_MODE;
- writel_relaxed(val, port->base + PCIE_SETTING_REG);
+ writel_relaxed(val, pcie->base + PCIE_SETTING_REG);
/* Set class code */
- val = readl_relaxed(port->base + PCIE_PCI_IDS_1);
+ val = readl_relaxed(pcie->base + PCIE_PCI_IDS_1);
val &= ~GENMASK(31, 8);
val |= PCI_CLASS(PCI_CLASS_BRIDGE_PCI << 8);
- writel_relaxed(val, port->base + PCIE_PCI_IDS_1);
+ writel_relaxed(val, pcie->base + PCIE_PCI_IDS_1);
/* Mask all INTx interrupts */
- val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
val &= ~PCIE_INTX_ENABLE;
- writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+ writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+
+ /* Disable DVFSRC voltage request */
+ val = readl_relaxed(pcie->base + PCIE_MISC_CTRL_REG);
+ val |= PCIE_DISABLE_DVFSRC_VLT_REQ;
+ writel_relaxed(val, pcie->base + PCIE_MISC_CTRL_REG);
/* Assert all reset signals */
- val = readl_relaxed(port->base + PCIE_RST_CTRL_REG);
+ val = readl_relaxed(pcie->base + PCIE_RST_CTRL_REG);
val |= PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB;
- writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+ writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
/*
- * Described in PCIe CEM specification setctions 2.2 (PERST# Signal)
+ * Described in PCIe CEM specification sections 2.2 (PERST# Signal)
* and 2.2.1 (Initial Power-Up (G3 to S0)).
* The deassertion of PERST# should be delayed 100ms (TPVPERL)
* for the power and clock to become stable.
@@ -312,19 +320,19 @@ static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
/* De-assert reset signals */
val &= ~(PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB);
- writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+ writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
/* Check if the link is up or not */
- err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_REG, val,
+ err = readl_poll_timeout(pcie->base + PCIE_LINK_STATUS_REG, val,
!!(val & PCIE_PORT_LINKUP), 20,
PCI_PM_D3COLD_WAIT * USEC_PER_MSEC);
if (err) {
- val = readl_relaxed(port->base + PCIE_LTSSM_STATUS_REG);
- dev_err(port->dev, "PCIe link down, ltssm reg val: %#x\n", val);
+ val = readl_relaxed(pcie->base + PCIE_LTSSM_STATUS_REG);
+ dev_err(pcie->dev, "PCIe link down, ltssm reg val: %#x\n", val);
return err;
}
- mtk_pcie_enable_msi(port);
+ mtk_pcie_enable_msi(pcie);
/* Set PCIe translation windows */
resource_list_for_each_entry(entry, &host->windows) {
@@ -347,12 +355,12 @@ static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
pci_addr = res->start - entry->offset;
size = resource_size(res);
- err = mtk_pcie_set_trans_table(port, cpu_addr, pci_addr, size,
+ err = mtk_pcie_set_trans_table(pcie, cpu_addr, pci_addr, size,
type, table_index);
if (err)
return err;
- dev_dbg(port->dev, "set %s trans window[%d]: cpu_addr = %#llx, pci_addr = %#llx, size = %#llx\n",
+ dev_dbg(pcie->dev, "set %s trans window[%d]: cpu_addr = %#llx, pci_addr = %#llx, size = %#llx\n",
range_type, table_index, (unsigned long long)cpu_addr,
(unsigned long long)pci_addr, (unsigned long long)size);
@@ -396,7 +404,7 @@ static struct msi_domain_info mtk_msi_domain_info = {
static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
- struct mtk_pcie_port *port = data->domain->host_data;
+ struct mtk_gen3_pcie *pcie = data->domain->host_data;
unsigned long hwirq;
hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
@@ -404,7 +412,7 @@ static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
msg->address_hi = upper_32_bits(msi_set->msg_addr);
msg->address_lo = lower_32_bits(msi_set->msg_addr);
msg->data = hwirq;
- dev_dbg(port->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n",
+ dev_dbg(pcie->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n",
hwirq, msg->address_hi, msg->address_lo, msg->data);
}
@@ -421,33 +429,33 @@ static void mtk_msi_bottom_irq_ack(struct irq_data *data)
static void mtk_msi_bottom_irq_mask(struct irq_data *data)
{
struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
- struct mtk_pcie_port *port = data->domain->host_data;
+ struct mtk_gen3_pcie *pcie = data->domain->host_data;
unsigned long hwirq, flags;
u32 val;
hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
- raw_spin_lock_irqsave(&port->irq_lock, flags);
+ raw_spin_lock_irqsave(&pcie->irq_lock, flags);
val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
val &= ~BIT(hwirq);
writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
- raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
}
static void mtk_msi_bottom_irq_unmask(struct irq_data *data)
{
struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
- struct mtk_pcie_port *port = data->domain->host_data;
+ struct mtk_gen3_pcie *pcie = data->domain->host_data;
unsigned long hwirq, flags;
u32 val;
hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
- raw_spin_lock_irqsave(&port->irq_lock, flags);
+ raw_spin_lock_irqsave(&pcie->irq_lock, flags);
val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
val |= BIT(hwirq);
writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
- raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
}
static struct irq_chip mtk_msi_bottom_irq_chip = {
@@ -463,22 +471,22 @@ static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *arg)
{
- struct mtk_pcie_port *port = domain->host_data;
+ struct mtk_gen3_pcie *pcie = domain->host_data;
struct mtk_msi_set *msi_set;
int i, hwirq, set_idx;
- mutex_lock(&port->lock);
+ mutex_lock(&pcie->lock);
- hwirq = bitmap_find_free_region(port->msi_irq_in_use, PCIE_MSI_IRQS_NUM,
+ hwirq = bitmap_find_free_region(pcie->msi_irq_in_use, PCIE_MSI_IRQS_NUM,
order_base_2(nr_irqs));
- mutex_unlock(&port->lock);
+ mutex_unlock(&pcie->lock);
if (hwirq < 0)
return -ENOSPC;
set_idx = hwirq / PCIE_MSI_IRQS_PER_SET;
- msi_set = &port->msi_sets[set_idx];
+ msi_set = &pcie->msi_sets[set_idx];
for (i = 0; i < nr_irqs; i++)
irq_domain_set_info(domain, virq + i, hwirq + i,
@@ -491,15 +499,15 @@ static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain,
static void mtk_msi_bottom_domain_free(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
- struct mtk_pcie_port *port = domain->host_data;
+ struct mtk_gen3_pcie *pcie = domain->host_data;
struct irq_data *data = irq_domain_get_irq_data(domain, virq);
- mutex_lock(&port->lock);
+ mutex_lock(&pcie->lock);
- bitmap_release_region(port->msi_irq_in_use, data->hwirq,
+ bitmap_release_region(pcie->msi_irq_in_use, data->hwirq,
order_base_2(nr_irqs));
- mutex_unlock(&port->lock);
+ mutex_unlock(&pcie->lock);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}
@@ -511,28 +519,28 @@ static const struct irq_domain_ops mtk_msi_bottom_domain_ops = {
static void mtk_intx_mask(struct irq_data *data)
{
- struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 val;
- raw_spin_lock_irqsave(&port->irq_lock, flags);
- val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+ val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
val &= ~BIT(data->hwirq + PCIE_INTX_SHIFT);
- writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
- raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+ writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+ raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
}
static void mtk_intx_unmask(struct irq_data *data)
{
- struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 val;
- raw_spin_lock_irqsave(&port->irq_lock, flags);
- val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+ val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
val |= BIT(data->hwirq + PCIE_INTX_SHIFT);
- writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
- raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+ writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+ raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
}
/**
@@ -545,11 +553,11 @@ static void mtk_intx_unmask(struct irq_data *data)
*/
static void mtk_intx_eoi(struct irq_data *data)
{
- struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long hwirq;
hwirq = data->hwirq + PCIE_INTX_SHIFT;
- writel_relaxed(BIT(hwirq), port->base + PCIE_INT_STATUS_REG);
+ writel_relaxed(BIT(hwirq), pcie->base + PCIE_INT_STATUS_REG);
}
static struct irq_chip mtk_intx_irq_chip = {
@@ -573,13 +581,13 @@ static const struct irq_domain_ops intx_domain_ops = {
.map = mtk_pcie_intx_map,
};
-static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
+static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
struct device_node *intc_node, *node = dev->of_node;
int ret;
- raw_spin_lock_init(&port->irq_lock);
+ raw_spin_lock_init(&pcie->irq_lock);
/* Setup INTx */
intc_node = of_get_child_by_name(node, "interrupt-controller");
@@ -588,28 +596,28 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
return -ENODEV;
}
- port->intx_domain = irq_domain_add_linear(intc_node, PCI_NUM_INTX,
- &intx_domain_ops, port);
- if (!port->intx_domain) {
+ pcie->intx_domain = irq_domain_add_linear(intc_node, PCI_NUM_INTX,
+ &intx_domain_ops, pcie);
+ if (!pcie->intx_domain) {
dev_err(dev, "failed to create INTx IRQ domain\n");
return -ENODEV;
}
/* Setup MSI */
- mutex_init(&port->lock);
+ mutex_init(&pcie->lock);
- port->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
- &mtk_msi_bottom_domain_ops, port);
- if (!port->msi_bottom_domain) {
+ pcie->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
+ &mtk_msi_bottom_domain_ops, pcie);
+ if (!pcie->msi_bottom_domain) {
dev_err(dev, "failed to create MSI bottom domain\n");
ret = -ENODEV;
goto err_msi_bottom_domain;
}
- port->msi_domain = pci_msi_create_irq_domain(dev->fwnode,
+ pcie->msi_domain = pci_msi_create_irq_domain(dev->fwnode,
&mtk_msi_domain_info,
- port->msi_bottom_domain);
- if (!port->msi_domain) {
+ pcie->msi_bottom_domain);
+ if (!pcie->msi_domain) {
dev_err(dev, "failed to create MSI domain\n");
ret = -ENODEV;
goto err_msi_domain;
@@ -618,32 +626,32 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
return 0;
err_msi_domain:
- irq_domain_remove(port->msi_bottom_domain);
+ irq_domain_remove(pcie->msi_bottom_domain);
err_msi_bottom_domain:
- irq_domain_remove(port->intx_domain);
+ irq_domain_remove(pcie->intx_domain);
return ret;
}
-static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port)
+static void mtk_pcie_irq_teardown(struct mtk_gen3_pcie *pcie)
{
- irq_set_chained_handler_and_data(port->irq, NULL, NULL);
+ irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
- if (port->intx_domain)
- irq_domain_remove(port->intx_domain);
+ if (pcie->intx_domain)
+ irq_domain_remove(pcie->intx_domain);
- if (port->msi_domain)
- irq_domain_remove(port->msi_domain);
+ if (pcie->msi_domain)
+ irq_domain_remove(pcie->msi_domain);
- if (port->msi_bottom_domain)
- irq_domain_remove(port->msi_bottom_domain);
+ if (pcie->msi_bottom_domain)
+ irq_domain_remove(pcie->msi_bottom_domain);
- irq_dispose_mapping(port->irq);
+ irq_dispose_mapping(pcie->irq);
}
-static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
+static void mtk_pcie_msi_handler(struct mtk_gen3_pcie *pcie, int set_idx)
{
- struct mtk_msi_set *msi_set = &port->msi_sets[set_idx];
+ struct mtk_msi_set *msi_set = &pcie->msi_sets[set_idx];
unsigned long msi_enable, msi_status;
irq_hw_number_t bit, hwirq;
@@ -658,59 +666,59 @@ static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) {
hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET;
- generic_handle_domain_irq(port->msi_bottom_domain, hwirq);
+ generic_handle_domain_irq(pcie->msi_bottom_domain, hwirq);
}
} while (true);
}
static void mtk_pcie_irq_handler(struct irq_desc *desc)
{
- struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
+ struct mtk_gen3_pcie *pcie = irq_desc_get_handler_data(desc);
struct irq_chip *irqchip = irq_desc_get_chip(desc);
unsigned long status;
irq_hw_number_t irq_bit = PCIE_INTX_SHIFT;
chained_irq_enter(irqchip, desc);
- status = readl_relaxed(port->base + PCIE_INT_STATUS_REG);
+ status = readl_relaxed(pcie->base + PCIE_INT_STATUS_REG);
for_each_set_bit_from(irq_bit, &status, PCI_NUM_INTX +
PCIE_INTX_SHIFT)
- generic_handle_domain_irq(port->intx_domain,
+ generic_handle_domain_irq(pcie->intx_domain,
irq_bit - PCIE_INTX_SHIFT);
irq_bit = PCIE_MSI_SHIFT;
for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM +
PCIE_MSI_SHIFT) {
- mtk_pcie_msi_handler(port, irq_bit - PCIE_MSI_SHIFT);
+ mtk_pcie_msi_handler(pcie, irq_bit - PCIE_MSI_SHIFT);
- writel_relaxed(BIT(irq_bit), port->base + PCIE_INT_STATUS_REG);
+ writel_relaxed(BIT(irq_bit), pcie->base + PCIE_INT_STATUS_REG);
}
chained_irq_exit(irqchip, desc);
}
-static int mtk_pcie_setup_irq(struct mtk_pcie_port *port)
+static int mtk_pcie_setup_irq(struct mtk_gen3_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
struct platform_device *pdev = to_platform_device(dev);
int err;
- err = mtk_pcie_init_irq_domains(port);
+ err = mtk_pcie_init_irq_domains(pcie);
if (err)
return err;
- port->irq = platform_get_irq(pdev, 0);
- if (port->irq < 0)
- return port->irq;
+ pcie->irq = platform_get_irq(pdev, 0);
+ if (pcie->irq < 0)
+ return pcie->irq;
- irq_set_chained_handler_and_data(port->irq, mtk_pcie_irq_handler, port);
+ irq_set_chained_handler_and_data(pcie->irq, mtk_pcie_irq_handler, pcie);
return 0;
}
-static int mtk_pcie_parse_port(struct mtk_pcie_port *port)
+static int mtk_pcie_parse_port(struct mtk_gen3_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
struct platform_device *pdev = to_platform_device(dev);
struct resource *regs;
int ret;
@@ -718,77 +726,77 @@ static int mtk_pcie_parse_port(struct mtk_pcie_port *port)
regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcie-mac");
if (!regs)
return -EINVAL;
- port->base = devm_ioremap_resource(dev, regs);
- if (IS_ERR(port->base)) {
+ pcie->base = devm_ioremap_resource(dev, regs);
+ if (IS_ERR(pcie->base)) {
dev_err(dev, "failed to map register base\n");
- return PTR_ERR(port->base);
+ return PTR_ERR(pcie->base);
}
- port->reg_base = regs->start;
+ pcie->reg_base = regs->start;
- port->phy_reset = devm_reset_control_get_optional_exclusive(dev, "phy");
- if (IS_ERR(port->phy_reset)) {
- ret = PTR_ERR(port->phy_reset);
+ pcie->phy_reset = devm_reset_control_get_optional_exclusive(dev, "phy");
+ if (IS_ERR(pcie->phy_reset)) {
+ ret = PTR_ERR(pcie->phy_reset);
if (ret != -EPROBE_DEFER)
dev_err(dev, "failed to get PHY reset\n");
return ret;
}
- port->mac_reset = devm_reset_control_get_optional_exclusive(dev, "mac");
- if (IS_ERR(port->mac_reset)) {
- ret = PTR_ERR(port->mac_reset);
+ pcie->mac_reset = devm_reset_control_get_optional_exclusive(dev, "mac");
+ if (IS_ERR(pcie->mac_reset)) {
+ ret = PTR_ERR(pcie->mac_reset);
if (ret != -EPROBE_DEFER)
dev_err(dev, "failed to get MAC reset\n");
return ret;
}
- port->phy = devm_phy_optional_get(dev, "pcie-phy");
- if (IS_ERR(port->phy)) {
- ret = PTR_ERR(port->phy);
+ pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+ if (IS_ERR(pcie->phy)) {
+ ret = PTR_ERR(pcie->phy);
if (ret != -EPROBE_DEFER)
dev_err(dev, "failed to get PHY\n");
return ret;
}
- port->num_clks = devm_clk_bulk_get_all(dev, &port->clks);
- if (port->num_clks < 0) {
+ pcie->num_clks = devm_clk_bulk_get_all(dev, &pcie->clks);
+ if (pcie->num_clks < 0) {
dev_err(dev, "failed to get clocks\n");
- return port->num_clks;
+ return pcie->num_clks;
}
return 0;
}
-static int mtk_pcie_power_up(struct mtk_pcie_port *port)
+static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
int err;
/* PHY power on and enable pipe clock */
- reset_control_deassert(port->phy_reset);
+ reset_control_deassert(pcie->phy_reset);
- err = phy_init(port->phy);
+ err = phy_init(pcie->phy);
if (err) {
dev_err(dev, "failed to initialize PHY\n");
goto err_phy_init;
}
- err = phy_power_on(port->phy);
+ err = phy_power_on(pcie->phy);
if (err) {
dev_err(dev, "failed to power on PHY\n");
goto err_phy_on;
}
/* MAC power on and enable transaction layer clocks */
- reset_control_deassert(port->mac_reset);
+ reset_control_deassert(pcie->mac_reset);
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
- err = clk_bulk_prepare_enable(port->num_clks, port->clks);
+ err = clk_bulk_prepare_enable(pcie->num_clks, pcie->clks);
if (err) {
dev_err(dev, "failed to enable clocks\n");
goto err_clk_init;
@@ -799,55 +807,55 @@ static int mtk_pcie_power_up(struct mtk_pcie_port *port)
err_clk_init:
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
- reset_control_assert(port->mac_reset);
- phy_power_off(port->phy);
+ reset_control_assert(pcie->mac_reset);
+ phy_power_off(pcie->phy);
err_phy_on:
- phy_exit(port->phy);
+ phy_exit(pcie->phy);
err_phy_init:
- reset_control_assert(port->phy_reset);
+ reset_control_assert(pcie->phy_reset);
return err;
}
-static void mtk_pcie_power_down(struct mtk_pcie_port *port)
+static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
{
- clk_bulk_disable_unprepare(port->num_clks, port->clks);
+ clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
- pm_runtime_put_sync(port->dev);
- pm_runtime_disable(port->dev);
- reset_control_assert(port->mac_reset);
+ pm_runtime_put_sync(pcie->dev);
+ pm_runtime_disable(pcie->dev);
+ reset_control_assert(pcie->mac_reset);
- phy_power_off(port->phy);
- phy_exit(port->phy);
- reset_control_assert(port->phy_reset);
+ phy_power_off(pcie->phy);
+ phy_exit(pcie->phy);
+ reset_control_assert(pcie->phy_reset);
}
-static int mtk_pcie_setup(struct mtk_pcie_port *port)
+static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
{
int err;
- err = mtk_pcie_parse_port(port);
+ err = mtk_pcie_parse_port(pcie);
if (err)
return err;
/* Don't touch the hardware registers before power up */
- err = mtk_pcie_power_up(port);
+ err = mtk_pcie_power_up(pcie);
if (err)
return err;
/* Try link up */
- err = mtk_pcie_startup_port(port);
+ err = mtk_pcie_startup_port(pcie);
if (err)
goto err_setup;
- err = mtk_pcie_setup_irq(port);
+ err = mtk_pcie_setup_irq(pcie);
if (err)
goto err_setup;
return 0;
err_setup:
- mtk_pcie_power_down(port);
+ mtk_pcie_power_down(pcie);
return err;
}
@@ -855,30 +863,30 @@ err_setup:
static int mtk_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct mtk_pcie_port *port;
+ struct mtk_gen3_pcie *pcie;
struct pci_host_bridge *host;
int err;
- host = devm_pci_alloc_host_bridge(dev, sizeof(*port));
+ host = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
if (!host)
return -ENOMEM;
- port = pci_host_bridge_priv(host);
+ pcie = pci_host_bridge_priv(host);
- port->dev = dev;
- platform_set_drvdata(pdev, port);
+ pcie->dev = dev;
+ platform_set_drvdata(pdev, pcie);
- err = mtk_pcie_setup(port);
+ err = mtk_pcie_setup(pcie);
if (err)
return err;
host->ops = &mtk_pcie_ops;
- host->sysdata = port;
+ host->sysdata = pcie;
err = pci_host_probe(host);
if (err) {
- mtk_pcie_irq_teardown(port);
- mtk_pcie_power_down(port);
+ mtk_pcie_irq_teardown(pcie);
+ mtk_pcie_power_down(pcie);
return err;
}
@@ -887,66 +895,66 @@ static int mtk_pcie_probe(struct platform_device *pdev)
static int mtk_pcie_remove(struct platform_device *pdev)
{
- struct mtk_pcie_port *port = platform_get_drvdata(pdev);
- struct pci_host_bridge *host = pci_host_bridge_from_priv(port);
+ struct mtk_gen3_pcie *pcie = platform_get_drvdata(pdev);
+ struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
pci_lock_rescan_remove();
pci_stop_root_bus(host->bus);
pci_remove_root_bus(host->bus);
pci_unlock_rescan_remove();
- mtk_pcie_irq_teardown(port);
- mtk_pcie_power_down(port);
+ mtk_pcie_irq_teardown(pcie);
+ mtk_pcie_power_down(pcie);
return 0;
}
-static void __maybe_unused mtk_pcie_irq_save(struct mtk_pcie_port *port)
+static void __maybe_unused mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
{
int i;
- raw_spin_lock(&port->irq_lock);
+ raw_spin_lock(&pcie->irq_lock);
- port->saved_irq_state = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ pcie->saved_irq_state = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
- struct mtk_msi_set *msi_set = &port->msi_sets[i];
+ struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
msi_set->saved_irq_state = readl_relaxed(msi_set->base +
PCIE_MSI_SET_ENABLE_OFFSET);
}
- raw_spin_unlock(&port->irq_lock);
+ raw_spin_unlock(&pcie->irq_lock);
}
-static void __maybe_unused mtk_pcie_irq_restore(struct mtk_pcie_port *port)
+static void __maybe_unused mtk_pcie_irq_restore(struct mtk_gen3_pcie *pcie)
{
int i;
- raw_spin_lock(&port->irq_lock);
+ raw_spin_lock(&pcie->irq_lock);
- writel_relaxed(port->saved_irq_state, port->base + PCIE_INT_ENABLE_REG);
+ writel_relaxed(pcie->saved_irq_state, pcie->base + PCIE_INT_ENABLE_REG);
for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
- struct mtk_msi_set *msi_set = &port->msi_sets[i];
+ struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
writel_relaxed(msi_set->saved_irq_state,
msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
}
- raw_spin_unlock(&port->irq_lock);
+ raw_spin_unlock(&pcie->irq_lock);
}
-static int __maybe_unused mtk_pcie_turn_off_link(struct mtk_pcie_port *port)
+static int __maybe_unused mtk_pcie_turn_off_link(struct mtk_gen3_pcie *pcie)
{
u32 val;
- val = readl_relaxed(port->base + PCIE_ICMD_PM_REG);
+ val = readl_relaxed(pcie->base + PCIE_ICMD_PM_REG);
val |= PCIE_TURN_OFF_LINK;
- writel_relaxed(val, port->base + PCIE_ICMD_PM_REG);
+ writel_relaxed(val, pcie->base + PCIE_ICMD_PM_REG);
/* Check the link is L2 */
- return readl_poll_timeout(port->base + PCIE_LTSSM_STATUS_REG, val,
+ return readl_poll_timeout(pcie->base + PCIE_LTSSM_STATUS_REG, val,
(PCIE_LTSSM_STATE(val) ==
PCIE_LTSSM_STATE_L2_IDLE), 20,
50 * USEC_PER_MSEC);
@@ -954,46 +962,46 @@ static int __maybe_unused mtk_pcie_turn_off_link(struct mtk_pcie_port *port)
static int __maybe_unused mtk_pcie_suspend_noirq(struct device *dev)
{
- struct mtk_pcie_port *port = dev_get_drvdata(dev);
+ struct mtk_gen3_pcie *pcie = dev_get_drvdata(dev);
int err;
u32 val;
/* Trigger link to L2 state */
- err = mtk_pcie_turn_off_link(port);
+ err = mtk_pcie_turn_off_link(pcie);
if (err) {
- dev_err(port->dev, "cannot enter L2 state\n");
+ dev_err(pcie->dev, "cannot enter L2 state\n");
return err;
}
/* Pull down the PERST# pin */
- val = readl_relaxed(port->base + PCIE_RST_CTRL_REG);
+ val = readl_relaxed(pcie->base + PCIE_RST_CTRL_REG);
val |= PCIE_PE_RSTB;
- writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+ writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
- dev_dbg(port->dev, "entered L2 states successfully");
+ dev_dbg(pcie->dev, "entered L2 states successfully");
- mtk_pcie_irq_save(port);
- mtk_pcie_power_down(port);
+ mtk_pcie_irq_save(pcie);
+ mtk_pcie_power_down(pcie);
return 0;
}
static int __maybe_unused mtk_pcie_resume_noirq(struct device *dev)
{
- struct mtk_pcie_port *port = dev_get_drvdata(dev);
+ struct mtk_gen3_pcie *pcie = dev_get_drvdata(dev);
int err;
- err = mtk_pcie_power_up(port);
+ err = mtk_pcie_power_up(pcie);
if (err)
return err;
- err = mtk_pcie_startup_port(port);
+ err = mtk_pcie_startup_port(pcie);
if (err) {
- mtk_pcie_power_down(port);
+ mtk_pcie_power_down(pcie);
return err;
}
- mtk_pcie_irq_restore(port);
+ mtk_pcie_irq_restore(pcie);
return 0;
}
diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
index 2f3f974977a3..ddfbd4aebdec 100644
--- a/drivers/pci/controller/pcie-mediatek.c
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -365,19 +365,12 @@ static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
{
struct mtk_pcie_port *port;
u32 bn = bus->number;
- int ret;
port = mtk_pcie_find_port(bus, devfn);
- if (!port) {
- *val = ~0;
+ if (!port)
return PCIBIOS_DEVICE_NOT_FOUND;
- }
-
- ret = mtk_pcie_hw_rd_cfg(port, bn, devfn, where, size, val);
- if (ret)
- *val = ~0;
- return ret;
+ return mtk_pcie_hw_rd_cfg(port, bn, devfn, where, size, val);
}
static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
@@ -702,6 +695,13 @@ static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
*/
writel(PCIE_LINKDOWN_RST_EN, port->base + PCIE_RST_CTRL);
+ /*
+ * Described in PCIe CEM specification sections 2.2 (PERST# Signal) and
+ * 2.2.1 (Initial Power-Up (G3 to S0)). The deassertion of PERST# should
+ * be delayed 100ms (TPVPERL) for the power and clock to become stable.
+ */
+ msleep(100);
+
/* De-assert PHY, PE, PIPE, MAC and configuration reset */
val = readl(port->base + PCIE_RST_CTRL);
val |= PCIE_PHY_RSTB | PCIE_PERSTB | PCIE_PIPE_SRSTB |
diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
index 329f930d17aa..29d8e81e4181 100644
--- a/drivers/pci/controller/pcie-microchip-host.c
+++ b/drivers/pci/controller/pcie-microchip-host.c
@@ -262,7 +262,7 @@ struct mc_msi {
DECLARE_BITMAP(used, MC_NUM_MSI_IRQS);
};
-struct mc_port {
+struct mc_pcie {
void __iomem *axi_base_addr;
struct device *dev;
struct irq_domain *intx_domain;
@@ -382,7 +382,7 @@ static struct {
static char poss_clks[][5] = { "fic0", "fic1", "fic2", "fic3" };
-static void mc_pcie_enable_msi(struct mc_port *port, void __iomem *base)
+static void mc_pcie_enable_msi(struct mc_pcie *port, void __iomem *base)
{
struct mc_msi *msi = &port->msi;
u32 cap_offset = MC_MSI_CAP_CTRL_OFFSET;
@@ -405,7 +405,7 @@ static void mc_pcie_enable_msi(struct mc_port *port, void __iomem *base)
static void mc_handle_msi(struct irq_desc *desc)
{
- struct mc_port *port = irq_desc_get_handler_data(desc);
+ struct mc_pcie *port = irq_desc_get_handler_data(desc);
struct device *dev = port->dev;
struct mc_msi *msi = &port->msi;
void __iomem *bridge_base_addr =
@@ -428,7 +428,7 @@ static void mc_handle_msi(struct irq_desc *desc)
static void mc_msi_bottom_irq_ack(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
u32 bitpos = data->hwirq;
@@ -443,7 +443,7 @@ static void mc_msi_bottom_irq_ack(struct irq_data *data)
static void mc_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
phys_addr_t addr = port->msi.vector_phy;
msg->address_lo = lower_32_bits(addr);
@@ -470,7 +470,7 @@ static struct irq_chip mc_msi_bottom_irq_chip = {
static int mc_irq_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *args)
{
- struct mc_port *port = domain->host_data;
+ struct mc_pcie *port = domain->host_data;
struct mc_msi *msi = &port->msi;
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
@@ -503,7 +503,7 @@ static void mc_irq_msi_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
- struct mc_port *port = irq_data_get_irq_chip_data(d);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(d);
struct mc_msi *msi = &port->msi;
mutex_lock(&msi->lock);
@@ -534,7 +534,7 @@ static struct msi_domain_info mc_msi_domain_info = {
.chip = &mc_msi_irq_chip,
};
-static int mc_allocate_msi_domains(struct mc_port *port)
+static int mc_allocate_msi_domains(struct mc_pcie *port)
{
struct device *dev = port->dev;
struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
@@ -562,7 +562,7 @@ static int mc_allocate_msi_domains(struct mc_port *port)
static void mc_handle_intx(struct irq_desc *desc)
{
- struct mc_port *port = irq_desc_get_handler_data(desc);
+ struct mc_pcie *port = irq_desc_get_handler_data(desc);
struct device *dev = port->dev;
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
@@ -585,7 +585,7 @@ static void mc_handle_intx(struct irq_desc *desc)
static void mc_ack_intx_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
@@ -595,7 +595,7 @@ static void mc_ack_intx_irq(struct irq_data *data)
static void mc_mask_intx_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
unsigned long flags;
@@ -611,7 +611,7 @@ static void mc_mask_intx_irq(struct irq_data *data)
static void mc_unmask_intx_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
unsigned long flags;
@@ -698,7 +698,7 @@ static u32 local_events(void __iomem *addr)
return val;
}
-static u32 get_events(struct mc_port *port)
+static u32 get_events(struct mc_pcie *port)
{
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
@@ -715,7 +715,7 @@ static u32 get_events(struct mc_port *port)
static irqreturn_t mc_event_handler(int irq, void *dev_id)
{
- struct mc_port *port = dev_id;
+ struct mc_pcie *port = dev_id;
struct device *dev = port->dev;
struct irq_data *data;
@@ -731,7 +731,7 @@ static irqreturn_t mc_event_handler(int irq, void *dev_id)
static void mc_handle_event(struct irq_desc *desc)
{
- struct mc_port *port = irq_desc_get_handler_data(desc);
+ struct mc_pcie *port = irq_desc_get_handler_data(desc);
unsigned long events;
u32 bit;
struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -748,7 +748,7 @@ static void mc_handle_event(struct irq_desc *desc)
static void mc_ack_event_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
u32 event = data->hwirq;
void __iomem *addr;
u32 mask;
@@ -763,7 +763,7 @@ static void mc_ack_event_irq(struct irq_data *data)
static void mc_mask_event_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
u32 event = data->hwirq;
void __iomem *addr;
u32 mask;
@@ -793,7 +793,7 @@ static void mc_mask_event_irq(struct irq_data *data)
static void mc_unmask_event_irq(struct irq_data *data)
{
- struct mc_port *port = irq_data_get_irq_chip_data(data);
+ struct mc_pcie *port = irq_data_get_irq_chip_data(data);
u32 event = data->hwirq;
void __iomem *addr;
u32 mask;
@@ -881,7 +881,7 @@ static int mc_pcie_init_clks(struct device *dev)
return 0;
}
-static int mc_pcie_init_irq_domains(struct mc_port *port)
+static int mc_pcie_init_irq_domains(struct mc_pcie *port)
{
struct device *dev = port->dev;
struct device_node *node = dev->of_node;
@@ -957,7 +957,7 @@ static void mc_pcie_setup_window(void __iomem *bridge_base_addr, u32 index,
}
static int mc_pcie_setup_windows(struct platform_device *pdev,
- struct mc_port *port)
+ struct mc_pcie *port)
{
void __iomem *bridge_base_addr =
port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
@@ -983,7 +983,7 @@ static int mc_platform_init(struct pci_config_window *cfg)
{
struct device *dev = cfg->parent;
struct platform_device *pdev = to_platform_device(dev);
- struct mc_port *port;
+ struct mc_pcie *port;
void __iomem *bridge_base_addr;
void __iomem *ctrl_base_addr;
int ret;
diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c
index b60dfb45ef7b..3824862ea144 100644
--- a/drivers/pci/controller/pcie-mt7621.c
+++ b/drivers/pci/controller/pcie-mt7621.c
@@ -93,8 +93,8 @@ struct mt7621_pcie_port {
* reset lines are inverted.
*/
struct mt7621_pcie {
- void __iomem *base;
struct device *dev;
+ void __iomem *base;
struct list_head ports;
bool resets_inverted;
};
@@ -129,7 +129,7 @@ static inline void pcie_port_write(struct mt7621_pcie_port *port,
writel_relaxed(val, port->base + reg);
}
-static inline u32 mt7621_pci_get_cfgaddr(unsigned int bus, unsigned int slot,
+static inline u32 mt7621_pcie_get_cfgaddr(unsigned int bus, unsigned int slot,
unsigned int func, unsigned int where)
{
return (((where & 0xf00) >> 8) << 24) | (bus << 16) | (slot << 11) |
@@ -140,7 +140,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus,
unsigned int devfn, int where)
{
struct mt7621_pcie *pcie = bus->sysdata;
- u32 address = mt7621_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn),
+ u32 address = mt7621_pcie_get_cfgaddr(bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), where);
writel_relaxed(address, pcie->base + RALINK_PCI_CONFIG_ADDR);
@@ -148,7 +148,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus,
return pcie->base + RALINK_PCI_CONFIG_DATA + (where & 3);
}
-struct pci_ops mt7621_pci_ops = {
+static struct pci_ops mt7621_pcie_ops = {
.map_bus = mt7621_pcie_map_bus,
.read = pci_generic_config_read,
.write = pci_generic_config_write,
@@ -156,7 +156,7 @@ struct pci_ops mt7621_pci_ops = {
static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg)
{
- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg);
+ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg);
pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
return pcie_read(pcie, RALINK_PCI_CONFIG_DATA);
@@ -165,7 +165,7 @@ static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg)
static void write_config(struct mt7621_pcie *pcie, unsigned int dev,
u32 reg, u32 val)
{
- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg);
+ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg);
pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
pcie_write(pcie, val, RALINK_PCI_CONFIG_DATA);
@@ -208,37 +208,6 @@ static inline void mt7621_control_deassert(struct mt7621_pcie_port *port)
reset_control_assert(port->pcie_rst);
}
-static int setup_cm_memory_region(struct pci_host_bridge *host)
-{
- struct mt7621_pcie *pcie = pci_host_bridge_priv(host);
- struct device *dev = pcie->dev;
- struct resource_entry *entry;
- resource_size_t mask;
-
- entry = resource_list_first_type(&host->windows, IORESOURCE_MEM);
- if (!entry) {
- dev_err(dev, "cannot get memory resource\n");
- return -EINVAL;
- }
-
- if (mips_cps_numiocu(0)) {
- /*
- * FIXME: hardware doesn't accept mask values with 1s after
- * 0s (e.g. 0xffef), so it would be great to warn if that's
- * about to happen
- */
- mask = ~(entry->res->end - entry->res->start);
-
- write_gcr_reg1_base(entry->res->start);
- write_gcr_reg1_mask(mask | CM_GCR_REGn_MASK_CMTGT_IOCU0);
- dev_info(dev, "PCI coherence region base: 0x%08llx, mask/settings: 0x%08llx\n",
- (unsigned long long)read_gcr_reg1_base(),
- (unsigned long long)read_gcr_reg1_mask());
- }
-
- return 0;
-}
-
static int mt7621_pcie_parse_port(struct mt7621_pcie *pcie,
struct device_node *node,
int slot)
@@ -505,16 +474,16 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host)
{
struct mt7621_pcie *pcie = pci_host_bridge_priv(host);
- host->ops = &mt7621_pci_ops;
+ host->ops = &mt7621_pcie_ops;
host->sysdata = pcie;
return pci_host_probe(host);
}
-static const struct soc_device_attribute mt7621_pci_quirks_match[] = {
+static const struct soc_device_attribute mt7621_pcie_quirks_match[] = {
{ .soc_id = "mt7621", .revision = "E2" }
};
-static int mt7621_pci_probe(struct platform_device *pdev)
+static int mt7621_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
const struct soc_device_attribute *attr;
@@ -535,7 +504,7 @@ static int mt7621_pci_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, pcie);
INIT_LIST_HEAD(&pcie->ports);
- attr = soc_device_match(mt7621_pci_quirks_match);
+ attr = soc_device_match(mt7621_pcie_quirks_match);
if (attr)
pcie->resets_inverted = true;
@@ -557,12 +526,6 @@ static int mt7621_pci_probe(struct platform_device *pdev)
goto remove_resets;
}
- err = setup_cm_memory_region(bridge);
- if (err) {
- dev_err(dev, "error setting up iocu mem regions\n");
- goto remove_resets;
- }
-
return mt7621_pcie_register_host(bridge);
remove_resets:
@@ -572,7 +535,7 @@ remove_resets:
return err;
}
-static int mt7621_pci_remove(struct platform_device *pdev)
+static int mt7621_pcie_remove(struct platform_device *pdev)
{
struct mt7621_pcie *pcie = platform_get_drvdata(pdev);
struct mt7621_pcie_port *port;
@@ -583,18 +546,20 @@ static int mt7621_pci_remove(struct platform_device *pdev)
return 0;
}
-static const struct of_device_id mt7621_pci_ids[] = {
+static const struct of_device_id mt7621_pcie_ids[] = {
{ .compatible = "mediatek,mt7621-pci" },
{},
};
-MODULE_DEVICE_TABLE(of, mt7621_pci_ids);
+MODULE_DEVICE_TABLE(of, mt7621_pcie_ids);
-static struct platform_driver mt7621_pci_driver = {
- .probe = mt7621_pci_probe,
- .remove = mt7621_pci_remove,
+static struct platform_driver mt7621_pcie_driver = {
+ .probe = mt7621_pcie_probe,
+ .remove = mt7621_pcie_remove,
.driver = {
.name = "mt7621-pci",
- .of_match_table = of_match_ptr(mt7621_pci_ids),
+ .of_match_table = of_match_ptr(mt7621_pcie_ids),
},
};
-builtin_platform_driver(mt7621_pci_driver);
+builtin_platform_driver(mt7621_pcie_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c
index e12c2d8be05a..38b6e02edfa9 100644
--- a/drivers/pci/controller/pcie-rcar-host.c
+++ b/drivers/pci/controller/pcie-rcar-host.c
@@ -50,10 +50,10 @@ struct rcar_msi {
*/
static void __iomem *pcie_base;
/*
- * Static copy of bus clock pointer, so we can check whether the clock
- * is enabled or not.
+ * Static copy of PCIe device pointer, so we can check whether the
+ * device is runtime suspended or not.
*/
-static struct clk *pcie_bus_clk;
+static struct device *pcie_dev;
#endif
/* Structure representing the PCIe interface */
@@ -159,10 +159,8 @@ static int rcar_pcie_read_conf(struct pci_bus *bus, unsigned int devfn,
ret = rcar_pcie_config_access(host, RCAR_PCI_ACCESS_READ,
bus, devfn, where, val);
- if (ret != PCIBIOS_SUCCESSFUL) {
- *val = 0xffffffff;
+ if (ret != PCIBIOS_SUCCESSFUL)
return ret;
- }
if (size == 1)
*val = (*val >> (BITS_PER_BYTE * (where & 3))) & 0xff;
@@ -792,7 +790,7 @@ static int rcar_pcie_get_resources(struct rcar_pcie_host *host)
#ifdef CONFIG_ARM
/* Cache static copy for L1 link state fixup hook on aarch32 */
pcie_base = pcie->base;
- pcie_bus_clk = host->bus_clk;
+ pcie_dev = pcie->dev;
#endif
return 0;
@@ -1062,7 +1060,7 @@ static int rcar_pcie_aarch32_abort_handler(unsigned long addr,
spin_lock_irqsave(&pmsr_lock, flags);
- if (!pcie_base || !__clk_is_enabled(pcie_bus_clk)) {
+ if (!pcie_base || pm_runtime_suspended(pcie_dev)) {
ret = 1;
goto unlock_exit;
}
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index c52316d0bfd2..45a28880f322 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -221,10 +221,8 @@ static int rockchip_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
{
struct rockchip_pcie *rockchip = bus->sysdata;
- if (!rockchip_pcie_valid_device(rockchip, bus, PCI_SLOT(devfn))) {
- *val = 0xffffffff;
+ if (!rockchip_pcie_valid_device(rockchip, bus, PCI_SLOT(devfn)))
return PCIBIOS_DEVICE_NOT_FOUND;
- }
if (pci_is_root_bus(bus))
return rockchip_pcie_rd_own_conf(rockchip, where, size, val);
diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c
index 95426df03200..c7cd44ed4dfc 100644
--- a/drivers/pci/controller/pcie-xilinx-cpm.c
+++ b/drivers/pci/controller/pcie-xilinx-cpm.c
@@ -99,10 +99,10 @@
#define XILINX_CPM_PCIE_REG_PSCR_LNKUP BIT(11)
/**
- * struct xilinx_cpm_pcie_port - PCIe port information
+ * struct xilinx_cpm_pcie - PCIe port information
+ * @dev: Device pointer
* @reg_base: Bridge Register Base
* @cpm_base: CPM System Level Control and Status Register(SLCR) Base
- * @dev: Device pointer
* @intx_domain: Legacy IRQ domain pointer
* @cpm_domain: CPM IRQ domain pointer
* @cfg: Holds mappings of config space window
@@ -110,10 +110,10 @@
* @irq: Error interrupt number
* @lock: lock protecting shared register access
*/
-struct xilinx_cpm_pcie_port {
+struct xilinx_cpm_pcie {
+ struct device *dev;
void __iomem *reg_base;
void __iomem *cpm_base;
- struct device *dev;
struct irq_domain *intx_domain;
struct irq_domain *cpm_domain;
struct pci_config_window *cfg;
@@ -122,24 +122,24 @@ struct xilinx_cpm_pcie_port {
raw_spinlock_t lock;
};
-static u32 pcie_read(struct xilinx_cpm_pcie_port *port, u32 reg)
+static u32 pcie_read(struct xilinx_cpm_pcie *port, u32 reg)
{
return readl_relaxed(port->reg_base + reg);
}
-static void pcie_write(struct xilinx_cpm_pcie_port *port,
+static void pcie_write(struct xilinx_cpm_pcie *port,
u32 val, u32 reg)
{
writel_relaxed(val, port->reg_base + reg);
}
-static bool cpm_pcie_link_up(struct xilinx_cpm_pcie_port *port)
+static bool cpm_pcie_link_up(struct xilinx_cpm_pcie *port)
{
return (pcie_read(port, XILINX_CPM_PCIE_REG_PSCR) &
XILINX_CPM_PCIE_REG_PSCR_LNKUP);
}
-static void cpm_pcie_clear_err_interrupts(struct xilinx_cpm_pcie_port *port)
+static void cpm_pcie_clear_err_interrupts(struct xilinx_cpm_pcie *port)
{
unsigned long val = pcie_read(port, XILINX_CPM_PCIE_REG_RPEFR);
@@ -153,7 +153,7 @@ static void cpm_pcie_clear_err_interrupts(struct xilinx_cpm_pcie_port *port)
static void xilinx_cpm_mask_leg_irq(struct irq_data *data)
{
- struct xilinx_cpm_pcie_port *port = irq_data_get_irq_chip_data(data);
+ struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
@@ -167,7 +167,7 @@ static void xilinx_cpm_mask_leg_irq(struct irq_data *data)
static void xilinx_cpm_unmask_leg_irq(struct irq_data *data)
{
- struct xilinx_cpm_pcie_port *port = irq_data_get_irq_chip_data(data);
+ struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
@@ -211,7 +211,7 @@ static const struct irq_domain_ops intx_domain_ops = {
static void xilinx_cpm_pcie_intx_flow(struct irq_desc *desc)
{
- struct xilinx_cpm_pcie_port *port = irq_desc_get_handler_data(desc);
+ struct xilinx_cpm_pcie *port = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned long val;
int i;
@@ -229,7 +229,7 @@ static void xilinx_cpm_pcie_intx_flow(struct irq_desc *desc)
static void xilinx_cpm_mask_event_irq(struct irq_data *d)
{
- struct xilinx_cpm_pcie_port *port = irq_data_get_irq_chip_data(d);
+ struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(d);
u32 val;
raw_spin_lock(&port->lock);
@@ -241,7 +241,7 @@ static void xilinx_cpm_mask_event_irq(struct irq_data *d)
static void xilinx_cpm_unmask_event_irq(struct irq_data *d)
{
- struct xilinx_cpm_pcie_port *port = irq_data_get_irq_chip_data(d);
+ struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(d);
u32 val;
raw_spin_lock(&port->lock);
@@ -273,7 +273,7 @@ static const struct irq_domain_ops event_domain_ops = {
static void xilinx_cpm_pcie_event_flow(struct irq_desc *desc)
{
- struct xilinx_cpm_pcie_port *port = irq_desc_get_handler_data(desc);
+ struct xilinx_cpm_pcie *port = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned long val;
int i;
@@ -327,7 +327,7 @@ static const struct {
static irqreturn_t xilinx_cpm_pcie_intr_handler(int irq, void *dev_id)
{
- struct xilinx_cpm_pcie_port *port = dev_id;
+ struct xilinx_cpm_pcie *port = dev_id;
struct device *dev = port->dev;
struct irq_data *d;
@@ -350,7 +350,7 @@ static irqreturn_t xilinx_cpm_pcie_intr_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void xilinx_cpm_free_irq_domains(struct xilinx_cpm_pcie_port *port)
+static void xilinx_cpm_free_irq_domains(struct xilinx_cpm_pcie *port)
{
if (port->intx_domain) {
irq_domain_remove(port->intx_domain);
@@ -369,7 +369,7 @@ static void xilinx_cpm_free_irq_domains(struct xilinx_cpm_pcie_port *port)
*
* Return: '0' on success and error value on failure
*/
-static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie_port *port)
+static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie *port)
{
struct device *dev = port->dev;
struct device_node *node = dev->of_node;
@@ -410,7 +410,7 @@ out:
return -ENOMEM;
}
-static int xilinx_cpm_setup_irq(struct xilinx_cpm_pcie_port *port)
+static int xilinx_cpm_setup_irq(struct xilinx_cpm_pcie *port)
{
struct device *dev = port->dev;
struct platform_device *pdev = to_platform_device(dev);
@@ -462,7 +462,7 @@ static int xilinx_cpm_setup_irq(struct xilinx_cpm_pcie_port *port)
* xilinx_cpm_pcie_init_port - Initialize hardware
* @port: PCIe port information
*/
-static void xilinx_cpm_pcie_init_port(struct xilinx_cpm_pcie_port *port)
+static void xilinx_cpm_pcie_init_port(struct xilinx_cpm_pcie *port)
{
if (cpm_pcie_link_up(port))
dev_info(port->dev, "PCIe Link is UP\n");
@@ -497,7 +497,7 @@ static void xilinx_cpm_pcie_init_port(struct xilinx_cpm_pcie_port *port)
*
* Return: '0' on success and error value on failure
*/
-static int xilinx_cpm_pcie_parse_dt(struct xilinx_cpm_pcie_port *port,
+static int xilinx_cpm_pcie_parse_dt(struct xilinx_cpm_pcie *port,
struct resource *bus_range)
{
struct device *dev = port->dev;
@@ -523,7 +523,7 @@ static int xilinx_cpm_pcie_parse_dt(struct xilinx_cpm_pcie_port *port,
return 0;
}
-static void xilinx_cpm_free_interrupts(struct xilinx_cpm_pcie_port *port)
+static void xilinx_cpm_free_interrupts(struct xilinx_cpm_pcie *port)
{
irq_set_chained_handler_and_data(port->intx_irq, NULL, NULL);
irq_set_chained_handler_and_data(port->irq, NULL, NULL);
@@ -537,7 +537,7 @@ static void xilinx_cpm_free_interrupts(struct xilinx_cpm_pcie_port *port)
*/
static int xilinx_cpm_pcie_probe(struct platform_device *pdev)
{
- struct xilinx_cpm_pcie_port *port;
+ struct xilinx_cpm_pcie *port;
struct device *dev = &pdev->dev;
struct pci_host_bridge *bridge;
struct resource_entry *bus;
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index a72b4f9a2b00..40d070e54ad2 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -146,7 +146,7 @@
struct nwl_msi { /* MSI information */
struct irq_domain *msi_domain;
- unsigned long *bitmap;
+ DECLARE_BITMAP(bitmap, INT_PCI_MSI_NR);
struct irq_domain *dev_domain;
struct mutex lock; /* protect bitmap variable */
int irq_msi0;
@@ -335,12 +335,10 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc)
static void nwl_pcie_handle_msi_irq(struct nwl_pcie *pcie, u32 status_reg)
{
- struct nwl_msi *msi;
+ struct nwl_msi *msi = &pcie->msi;
unsigned long status;
u32 bit;
- msi = &pcie->msi;
-
while ((status = nwl_bridge_readl(pcie, status_reg)) != 0) {
for_each_set_bit(bit, &status, 32) {
nwl_bridge_writel(pcie, 1 << bit, status_reg);
@@ -560,30 +558,21 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie)
struct nwl_msi *msi = &pcie->msi;
unsigned long base;
int ret;
- int size = BITS_TO_LONGS(INT_PCI_MSI_NR) * sizeof(long);
mutex_init(&msi->lock);
- msi->bitmap = kzalloc(size, GFP_KERNEL);
- if (!msi->bitmap)
- return -ENOMEM;
-
/* Get msi_1 IRQ number */
msi->irq_msi1 = platform_get_irq_byname(pdev, "msi1");
- if (msi->irq_msi1 < 0) {
- ret = -EINVAL;
- goto err;
- }
+ if (msi->irq_msi1 < 0)
+ return -EINVAL;
irq_set_chained_handler_and_data(msi->irq_msi1,
nwl_pcie_msi_handler_high, pcie);
/* Get msi_0 IRQ number */
msi->irq_msi0 = platform_get_irq_byname(pdev, "msi0");
- if (msi->irq_msi0 < 0) {
- ret = -EINVAL;
- goto err;
- }
+ if (msi->irq_msi0 < 0)
+ return -EINVAL;
irq_set_chained_handler_and_data(msi->irq_msi0,
nwl_pcie_msi_handler_low, pcie);
@@ -592,8 +581,7 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie)
ret = nwl_bridge_readl(pcie, I_MSII_CAPABILITIES) & MSII_PRESENT;
if (!ret) {
dev_err(dev, "MSI not present\n");
- ret = -EIO;
- goto err;
+ return -EIO;
}
/* Enable MSII */
@@ -632,10 +620,6 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie)
nwl_bridge_writel(pcie, MSGF_MSI_SR_LO_MASK, MSGF_MSI_MASK_LO);
return 0;
-err:
- kfree(msi->bitmap);
- msi->bitmap = NULL;
- return ret;
}
static int nwl_pcie_bridge_init(struct nwl_pcie *pcie)
diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
index aa9bdcebc838..cb6e9f7b0152 100644
--- a/drivers/pci/controller/pcie-xilinx.c
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -91,18 +91,18 @@
#define XILINX_NUM_MSI_IRQS 128
/**
- * struct xilinx_pcie_port - PCIe port information
- * @reg_base: IO Mapped Register Base
+ * struct xilinx_pcie - PCIe port information
* @dev: Device pointer
+ * @reg_base: IO Mapped Register Base
* @msi_map: Bitmap of allocated MSIs
* @map_lock: Mutex protecting the MSI allocation
* @msi_domain: MSI IRQ domain pointer
* @leg_domain: Legacy IRQ domain pointer
* @resources: Bus Resources
*/
-struct xilinx_pcie_port {
- void __iomem *reg_base;
+struct xilinx_pcie {
struct device *dev;
+ void __iomem *reg_base;
unsigned long msi_map[BITS_TO_LONGS(XILINX_NUM_MSI_IRQS)];
struct mutex map_lock;
struct irq_domain *msi_domain;
@@ -110,35 +110,35 @@ struct xilinx_pcie_port {
struct list_head resources;
};
-static inline u32 pcie_read(struct xilinx_pcie_port *port, u32 reg)
+static inline u32 pcie_read(struct xilinx_pcie *pcie, u32 reg)
{
- return readl(port->reg_base + reg);
+ return readl(pcie->reg_base + reg);
}
-static inline void pcie_write(struct xilinx_pcie_port *port, u32 val, u32 reg)
+static inline void pcie_write(struct xilinx_pcie *pcie, u32 val, u32 reg)
{
- writel(val, port->reg_base + reg);
+ writel(val, pcie->reg_base + reg);
}
-static inline bool xilinx_pcie_link_up(struct xilinx_pcie_port *port)
+static inline bool xilinx_pcie_link_up(struct xilinx_pcie *pcie)
{
- return (pcie_read(port, XILINX_PCIE_REG_PSCR) &
+ return (pcie_read(pcie, XILINX_PCIE_REG_PSCR) &
XILINX_PCIE_REG_PSCR_LNKUP) ? 1 : 0;
}
/**
* xilinx_pcie_clear_err_interrupts - Clear Error Interrupts
- * @port: PCIe port information
+ * @pcie: PCIe port information
*/
-static void xilinx_pcie_clear_err_interrupts(struct xilinx_pcie_port *port)
+static void xilinx_pcie_clear_err_interrupts(struct xilinx_pcie *pcie)
{
- struct device *dev = port->dev;
- unsigned long val = pcie_read(port, XILINX_PCIE_REG_RPEFR);
+ struct device *dev = pcie->dev;
+ unsigned long val = pcie_read(pcie, XILINX_PCIE_REG_RPEFR);
if (val & XILINX_PCIE_RPEFR_ERR_VALID) {
dev_dbg(dev, "Requester ID %lu\n",
val & XILINX_PCIE_RPEFR_REQ_ID);
- pcie_write(port, XILINX_PCIE_RPEFR_ALL_MASK,
+ pcie_write(pcie, XILINX_PCIE_RPEFR_ALL_MASK,
XILINX_PCIE_REG_RPEFR);
}
}
@@ -152,11 +152,11 @@ static void xilinx_pcie_clear_err_interrupts(struct xilinx_pcie_port *port)
*/
static bool xilinx_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
{
- struct xilinx_pcie_port *port = bus->sysdata;
+ struct xilinx_pcie *pcie = bus->sysdata;
- /* Check if link is up when trying to access downstream ports */
+ /* Check if link is up when trying to access downstream pcie ports */
if (!pci_is_root_bus(bus)) {
- if (!xilinx_pcie_link_up(port))
+ if (!xilinx_pcie_link_up(pcie))
return false;
} else if (devfn > 0) {
/* Only one device down on each root port */
@@ -177,12 +177,12 @@ static bool xilinx_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
static void __iomem *xilinx_pcie_map_bus(struct pci_bus *bus,
unsigned int devfn, int where)
{
- struct xilinx_pcie_port *port = bus->sysdata;
+ struct xilinx_pcie *pcie = bus->sysdata;
if (!xilinx_pcie_valid_device(bus, devfn))
return NULL;
- return port->reg_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
+ return pcie->reg_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
}
/* PCIe operations */
@@ -215,7 +215,7 @@ static int xilinx_msi_set_affinity(struct irq_data *d, const struct cpumask *mas
static void xilinx_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
- struct xilinx_pcie_port *pcie = irq_data_get_irq_chip_data(data);
+ struct xilinx_pcie *pcie = irq_data_get_irq_chip_data(data);
phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);
msg->address_lo = lower_32_bits(pa);
@@ -232,14 +232,14 @@ static struct irq_chip xilinx_msi_bottom_chip = {
static int xilinx_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *args)
{
- struct xilinx_pcie_port *port = domain->host_data;
+ struct xilinx_pcie *pcie = domain->host_data;
int hwirq, i;
- mutex_lock(&port->map_lock);
+ mutex_lock(&pcie->map_lock);
- hwirq = bitmap_find_free_region(port->msi_map, XILINX_NUM_MSI_IRQS, order_base_2(nr_irqs));
+ hwirq = bitmap_find_free_region(pcie->msi_map, XILINX_NUM_MSI_IRQS, order_base_2(nr_irqs));
- mutex_unlock(&port->map_lock);
+ mutex_unlock(&pcie->map_lock);
if (hwirq < 0)
return -ENOSPC;
@@ -256,13 +256,13 @@ static void xilinx_msi_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
- struct xilinx_pcie_port *port = domain->host_data;
+ struct xilinx_pcie *pcie = domain->host_data;
- mutex_lock(&port->map_lock);
+ mutex_lock(&pcie->map_lock);
- bitmap_release_region(port->msi_map, d->hwirq, order_base_2(nr_irqs));
+ bitmap_release_region(pcie->msi_map, d->hwirq, order_base_2(nr_irqs));
- mutex_unlock(&port->map_lock);
+ mutex_unlock(&pcie->map_lock);
}
static const struct irq_domain_ops xilinx_msi_domain_ops = {
@@ -275,7 +275,7 @@ static struct msi_domain_info xilinx_msi_info = {
.chip = &xilinx_msi_top_chip,
};
-static int xilinx_allocate_msi_domains(struct xilinx_pcie_port *pcie)
+static int xilinx_allocate_msi_domains(struct xilinx_pcie *pcie)
{
struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
struct irq_domain *parent;
@@ -298,7 +298,7 @@ static int xilinx_allocate_msi_domains(struct xilinx_pcie_port *pcie)
return 0;
}
-static void xilinx_free_msi_domains(struct xilinx_pcie_port *pcie)
+static void xilinx_free_msi_domains(struct xilinx_pcie *pcie)
{
struct irq_domain *parent = pcie->msi_domain->parent;
@@ -342,13 +342,13 @@ static const struct irq_domain_ops intx_domain_ops = {
*/
static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
{
- struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data;
- struct device *dev = port->dev;
+ struct xilinx_pcie *pcie = (struct xilinx_pcie *)data;
+ struct device *dev = pcie->dev;
u32 val, mask, status;
/* Read interrupt decode and mask registers */
- val = pcie_read(port, XILINX_PCIE_REG_IDR);
- mask = pcie_read(port, XILINX_PCIE_REG_IMR);
+ val = pcie_read(pcie, XILINX_PCIE_REG_IDR);
+ mask = pcie_read(pcie, XILINX_PCIE_REG_IMR);
status = val & mask;
if (!status)
@@ -371,23 +371,23 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
if (status & XILINX_PCIE_INTR_CORRECTABLE) {
dev_warn(dev, "Correctable error message\n");
- xilinx_pcie_clear_err_interrupts(port);
+ xilinx_pcie_clear_err_interrupts(pcie);
}
if (status & XILINX_PCIE_INTR_NONFATAL) {
dev_warn(dev, "Non fatal error message\n");
- xilinx_pcie_clear_err_interrupts(port);
+ xilinx_pcie_clear_err_interrupts(pcie);
}
if (status & XILINX_PCIE_INTR_FATAL) {
dev_warn(dev, "Fatal error message\n");
- xilinx_pcie_clear_err_interrupts(port);
+ xilinx_pcie_clear_err_interrupts(pcie);
}
if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) {
struct irq_domain *domain;
- val = pcie_read(port, XILINX_PCIE_REG_RPIFR1);
+ val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR1);
/* Check whether interrupt valid */
if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) {
@@ -397,17 +397,17 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
/* Decode the IRQ number */
if (val & XILINX_PCIE_RPIFR1_MSI_INTR) {
- val = pcie_read(port, XILINX_PCIE_REG_RPIFR2) &
+ val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR2) &
XILINX_PCIE_RPIFR2_MSG_DATA;
- domain = port->msi_domain->parent;
+ domain = pcie->msi_domain->parent;
} else {
val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
XILINX_PCIE_RPIFR1_INTR_SHIFT;
- domain = port->leg_domain;
+ domain = pcie->leg_domain;
}
/* Clear interrupt FIFO register 1 */
- pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
+ pcie_write(pcie, XILINX_PCIE_RPIFR1_ALL_MASK,
XILINX_PCIE_REG_RPIFR1);
generic_handle_domain_irq(domain, val);
@@ -442,20 +442,20 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
error:
/* Clear the Interrupt Decode register */
- pcie_write(port, status, XILINX_PCIE_REG_IDR);
+ pcie_write(pcie, status, XILINX_PCIE_REG_IDR);
return IRQ_HANDLED;
}
/**
* xilinx_pcie_init_irq_domain - Initialize IRQ domain
- * @port: PCIe port information
+ * @pcie: PCIe port information
*
* Return: '0' on success and error value on failure
*/
-static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
+static int xilinx_pcie_init_irq_domain(struct xilinx_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
struct device_node *pcie_intc_node;
int ret;
@@ -466,25 +466,25 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
return -ENODEV;
}
- port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+ pcie->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
&intx_domain_ops,
- port);
+ pcie);
of_node_put(pcie_intc_node);
- if (!port->leg_domain) {
+ if (!pcie->leg_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
return -ENODEV;
}
/* Setup MSI */
if (IS_ENABLED(CONFIG_PCI_MSI)) {
- phys_addr_t pa = ALIGN_DOWN(virt_to_phys(port), SZ_4K);
+ phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);
- ret = xilinx_allocate_msi_domains(port);
+ ret = xilinx_allocate_msi_domains(pcie);
if (ret)
return ret;
- pcie_write(port, upper_32_bits(pa), XILINX_PCIE_REG_MSIBASE1);
- pcie_write(port, lower_32_bits(pa), XILINX_PCIE_REG_MSIBASE2);
+ pcie_write(pcie, upper_32_bits(pa), XILINX_PCIE_REG_MSIBASE1);
+ pcie_write(pcie, lower_32_bits(pa), XILINX_PCIE_REG_MSIBASE2);
}
return 0;
@@ -492,44 +492,44 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
/**
* xilinx_pcie_init_port - Initialize hardware
- * @port: PCIe port information
+ * @pcie: PCIe port information
*/
-static void xilinx_pcie_init_port(struct xilinx_pcie_port *port)
+static void xilinx_pcie_init_port(struct xilinx_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
- if (xilinx_pcie_link_up(port))
+ if (xilinx_pcie_link_up(pcie))
dev_info(dev, "PCIe Link is UP\n");
else
dev_info(dev, "PCIe Link is DOWN\n");
/* Disable all interrupts */
- pcie_write(port, ~XILINX_PCIE_IDR_ALL_MASK,
+ pcie_write(pcie, ~XILINX_PCIE_IDR_ALL_MASK,
XILINX_PCIE_REG_IMR);
/* Clear pending interrupts */
- pcie_write(port, pcie_read(port, XILINX_PCIE_REG_IDR) &
+ pcie_write(pcie, pcie_read(pcie, XILINX_PCIE_REG_IDR) &
XILINX_PCIE_IMR_ALL_MASK,
XILINX_PCIE_REG_IDR);
/* Enable all interrupts we handle */
- pcie_write(port, XILINX_PCIE_IMR_ENABLE_MASK, XILINX_PCIE_REG_IMR);
+ pcie_write(pcie, XILINX_PCIE_IMR_ENABLE_MASK, XILINX_PCIE_REG_IMR);
/* Enable the Bridge enable bit */
- pcie_write(port, pcie_read(port, XILINX_PCIE_REG_RPSC) |
+ pcie_write(pcie, pcie_read(pcie, XILINX_PCIE_REG_RPSC) |
XILINX_PCIE_REG_RPSC_BEN,
XILINX_PCIE_REG_RPSC);
}
/**
* xilinx_pcie_parse_dt - Parse Device tree
- * @port: PCIe port information
+ * @pcie: PCIe port information
*
* Return: '0' on success and error value on failure
*/
-static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port)
+static int xilinx_pcie_parse_dt(struct xilinx_pcie *pcie)
{
- struct device *dev = port->dev;
+ struct device *dev = pcie->dev;
struct device_node *node = dev->of_node;
struct resource regs;
unsigned int irq;
@@ -541,14 +541,14 @@ static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port)
return err;
}
- port->reg_base = devm_pci_remap_cfg_resource(dev, &regs);
- if (IS_ERR(port->reg_base))
- return PTR_ERR(port->reg_base);
+ pcie->reg_base = devm_pci_remap_cfg_resource(dev, &regs);
+ if (IS_ERR(pcie->reg_base))
+ return PTR_ERR(pcie->reg_base);
irq = irq_of_parse_and_map(node, 0);
err = devm_request_irq(dev, irq, xilinx_pcie_intr_handler,
IRQF_SHARED | IRQF_NO_THREAD,
- "xilinx-pcie", port);
+ "xilinx-pcie", pcie);
if (err) {
dev_err(dev, "unable to request irq %d\n", irq);
return err;
@@ -566,41 +566,41 @@ static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port)
static int xilinx_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct xilinx_pcie_port *port;
+ struct xilinx_pcie *pcie;
struct pci_host_bridge *bridge;
int err;
if (!dev->of_node)
return -ENODEV;
- bridge = devm_pci_alloc_host_bridge(dev, sizeof(*port));
+ bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
if (!bridge)
return -ENODEV;
- port = pci_host_bridge_priv(bridge);
- mutex_init(&port->map_lock);
- port->dev = dev;
+ pcie = pci_host_bridge_priv(bridge);
+ mutex_init(&pcie->map_lock);
+ pcie->dev = dev;
- err = xilinx_pcie_parse_dt(port);
+ err = xilinx_pcie_parse_dt(pcie);
if (err) {
dev_err(dev, "Parsing DT failed\n");
return err;
}
- xilinx_pcie_init_port(port);
+ xilinx_pcie_init_port(pcie);
- err = xilinx_pcie_init_irq_domain(port);
+ err = xilinx_pcie_init_irq_domain(pcie);
if (err) {
dev_err(dev, "Failed creating IRQ Domain\n");
return err;
}
- bridge->sysdata = port;
+ bridge->sysdata = pcie;
bridge->ops = &xilinx_pcie_ops;
err = pci_host_probe(bridge);
if (err)
- xilinx_free_msi_domains(port);
+ xilinx_free_msi_domains(pcie);
return err;
}
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index a45e8e59d3d4..cc166c683638 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -501,6 +501,40 @@ static inline void vmd_acpi_begin(void) { }
static inline void vmd_acpi_end(void) { }
#endif /* CONFIG_ACPI */
+static void vmd_domain_reset(struct vmd_dev *vmd)
+{
+ u16 bus, max_buses = resource_size(&vmd->resources[0]);
+ u8 dev, functions, fn, hdr_type;
+ char __iomem *base;
+
+ for (bus = 0; bus < max_buses; bus++) {
+ for (dev = 0; dev < 32; dev++) {
+ base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
+ PCI_DEVFN(dev, 0), 0);
+
+ hdr_type = readb(base + PCI_HEADER_TYPE) &
+ PCI_HEADER_TYPE_MASK;
+
+ functions = (hdr_type & 0x80) ? 8 : 1;
+ for (fn = 0; fn < functions; fn++) {
+ base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
+ PCI_DEVFN(dev, fn), 0);
+
+ hdr_type = readb(base + PCI_HEADER_TYPE) &
+ PCI_HEADER_TYPE_MASK;
+
+ if (hdr_type != PCI_HEADER_TYPE_BRIDGE ||
+ (readw(base + PCI_CLASS_DEVICE) !=
+ PCI_CLASS_BRIDGE_PCI))
+ continue;
+
+ memset_io(base + PCI_IO_BASE, 0,
+ PCI_ROM_ADDRESS1 - PCI_IO_BASE);
+ }
+ }
+ }
+}
+
static void vmd_attach_resources(struct vmd_dev *vmd)
{
vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
@@ -541,7 +575,7 @@ static int vmd_get_phys_offsets(struct vmd_dev *vmd, bool native_hint,
int ret;
ret = pci_read_config_dword(dev, PCI_REG_VMLOCK, &vmlock);
- if (ret || vmlock == ~0)
+ if (ret || PCI_POSSIBLE_ERROR(vmlock))
return -ENODEV;
if (MB2_SHADOW_EN(vmlock)) {
@@ -661,6 +695,21 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
return 0;
}
+/*
+ * Since VMD is an aperture to regular PCIe root ports, only allow it to
+ * control features that the OS is allowed to control on the physical PCI bus.
+ */
+static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
+ struct pci_host_bridge *vmd_bridge)
+{
+ vmd_bridge->native_pcie_hotplug = root_bridge->native_pcie_hotplug;
+ vmd_bridge->native_shpc_hotplug = root_bridge->native_shpc_hotplug;
+ vmd_bridge->native_aer = root_bridge->native_aer;
+ vmd_bridge->native_pme = root_bridge->native_pme;
+ vmd_bridge->native_ltr = root_bridge->native_ltr;
+ vmd_bridge->native_dpc = root_bridge->native_dpc;
+}
+
static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
{
struct pci_sysdata *sd = &vmd->sysdata;
@@ -798,6 +847,9 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
return -ENODEV;
}
+ vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
+ to_pci_host_bridge(vmd->bus->bridge));
+
vmd_attach_resources(vmd);
if (vmd->irq_domain)
dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
@@ -805,6 +857,9 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
vmd_acpi_begin();
pci_scan_child_bus(vmd->bus);
+ vmd_domain_reset(vmd);
+ list_for_each_entry(child, &vmd->bus->children, node)
+ pci_reset_bus(child->self);
pci_assign_unassigned_bus_resources(vmd->bus);
/*
@@ -953,6 +1008,10 @@ static const struct pci_device_id vmd_ids[] = {
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
VMD_FEAT_HAS_BUS_RESTRICTIONS |
VMD_FEAT_OFFSET_FIRST_VECTOR,},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa77f),
+ .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
+ VMD_FEAT_HAS_BUS_RESTRICTIONS |
+ VMD_FEAT_OFFSET_FIRST_VECTOR,},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
VMD_FEAT_HAS_BUS_RESTRICTIONS |
diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c
index 5a03401f4571..9a00448c7e61 100644
--- a/drivers/pci/endpoint/functions/pci-epf-ntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-ntb.c
@@ -1262,7 +1262,7 @@ static void epf_ntb_db_mw_bar_cleanup(struct epf_ntb *ntb,
}
/**
- * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capaiblity
+ * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability
* @ntb: NTB device that facilitates communication between HOST1 and HOST2
* @type: PRIMARY interface or SECONDARY interface
*
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 38621558d397..3bc9273d0a08 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -334,7 +334,7 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 interrupts)
u8 encode_int;
if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
- interrupts > 32)
+ interrupts < 1 || interrupts > 32)
return -EINVAL;
if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO
index cc6194aa24c1..88f217c82b4f 100644
--- a/drivers/pci/hotplug/TODO
+++ b/drivers/pci/hotplug/TODO
@@ -30,11 +30,6 @@ ibmphp:
or ibmphp should store a pointer to its bus in struct slot. Probably the
former.
-* The functions get_max_adapter_speed() and get_bus_name() are commented out.
- Can they be deleted? There are also forward declarations at the top of
- ibmphp_core.c as well as pointers in ibmphp_hotplug_slot_ops, likewise
- commented out.
-
* ibmphp_init_devno() takes a struct slot **, it could instead take a
struct slot *.
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index ed7b58eb64d2..93fd2a621822 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -2273,7 +2273,7 @@ static u32 configure_new_device(struct controller *ctrl, struct pci_func *func
while ((function < max_functions) && (!stop_it)) {
pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(func->device, function), 0x00, &ID);
- if (ID == 0xFFFFFFFF) {
+ if (PCI_POSSIBLE_ERROR(ID)) {
function++;
} else {
/* Setup slot structure. */
@@ -2517,7 +2517,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func
pci_bus_read_config_dword(pci_bus, PCI_DEVFN(device, 0), 0x00, &ID);
pci_bus->number = func->bus;
- if (ID != 0xFFFFFFFF) { /* device present */
+ if (!PCI_POSSIBLE_ERROR(ID)) { /* device present */
/* Setup slot structure. */
new_slot = cpqhp_slot_create(hold_bus_node->base);
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 17124254d897..197997e264a2 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -50,14 +50,6 @@ static int irqs[16]; /* PIC mode IRQs we're using so far (in case MPS
static int init_flag;
-/*
-static int get_max_adapter_speed_1 (struct hotplug_slot *, u8 *, u8);
-
-static inline int get_max_adapter_speed (struct hotplug_slot *hs, u8 *value)
-{
- return get_max_adapter_speed_1 (hs, value, 1);
-}
-*/
static inline int get_cur_bus_info(struct slot **sl)
{
int rc = 1;
@@ -401,69 +393,6 @@ static int get_max_bus_speed(struct slot *slot)
return rc;
}
-/*
-static int get_max_adapter_speed_1(struct hotplug_slot *hotplug_slot, u8 *value, u8 flag)
-{
- int rc = -ENODEV;
- struct slot *pslot;
- struct slot myslot;
-
- debug("get_max_adapter_speed_1 - Entry hotplug_slot[%lx] pvalue[%lx]\n",
- (ulong)hotplug_slot, (ulong) value);
-
- if (flag)
- ibmphp_lock_operations();
-
- if (hotplug_slot && value) {
- pslot = hotplug_slot->private;
- if (pslot) {
- memcpy(&myslot, pslot, sizeof(struct slot));
- rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
- &(myslot.status));
-
- if (!(SLOT_LATCH (myslot.status)) &&
- (SLOT_PRESENT (myslot.status))) {
- rc = ibmphp_hpc_readslot(pslot,
- READ_EXTSLOTSTATUS,
- &(myslot.ext_status));
- if (!rc)
- *value = SLOT_SPEED(myslot.ext_status);
- } else
- *value = MAX_ADAPTER_NONE;
- }
- }
-
- if (flag)
- ibmphp_unlock_operations();
-
- debug("get_max_adapter_speed_1 - Exit rc[%d] value[%x]\n", rc, *value);
- return rc;
-}
-
-static int get_bus_name(struct hotplug_slot *hotplug_slot, char *value)
-{
- int rc = -ENODEV;
- struct slot *pslot = NULL;
-
- debug("get_bus_name - Entry hotplug_slot[%lx]\n", (ulong)hotplug_slot);
-
- ibmphp_lock_operations();
-
- if (hotplug_slot) {
- pslot = hotplug_slot->private;
- if (pslot) {
- rc = 0;
- snprintf(value, 100, "Bus %x", pslot->bus);
- }
- } else
- rc = -ENODEV;
-
- ibmphp_unlock_operations();
- debug("get_bus_name - Exit rc[%d] value[%x]\n", rc, *value);
- return rc;
-}
-*/
-
/****************************************************************************
* This routine will initialize the ops data structure used in the validate
* function. It will also power off empty slots that are powered on since BIOS
@@ -1231,9 +1160,6 @@ const struct hotplug_slot_ops ibmphp_hotplug_slot_ops = {
.get_attention_status = get_attention_status,
.get_latch_status = get_latch_status,
.get_adapter_status = get_adapter_present,
-/* .get_max_adapter_speed = get_max_adapter_speed,
- .get_bus_name_status = get_bus_name,
-*/
};
static void ibmphp_unload(void)
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 918dccbc74b6..e0a614acee05 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -75,6 +75,8 @@ extern int pciehp_poll_time;
* @reset_lock: prevents access to the Data Link Layer Link Active bit in the
* Link Status register and to the Presence Detect State bit in the Slot
* Status register during a slot reset which may cause them to flap
+ * @depth: Number of additional hotplug ports in the path to the root bus,
+ * used as lock subclass for @reset_lock
* @ist_running: flag to keep user request waiting while IRQ thread is running
* @request_result: result of last user request submitted to the IRQ thread
* @requester: wait queue to wake up on completion of user request,
@@ -106,6 +108,7 @@ struct controller {
struct hotplug_slot hotplug_slot; /* hotplug core interface */
struct rw_semaphore reset_lock;
+ unsigned int depth;
unsigned int ist_running;
int request_result;
wait_queue_head_t requester;
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index f34114d45259..4042d87d539d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl)
{
int occupied;
- down_read(&ctrl->reset_lock);
+ down_read_nested(&ctrl->reset_lock, ctrl->depth);
mutex_lock(&ctrl->state_lock);
occupied = pciehp_card_present_or_link_active(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 83a0fa119cae..1c1ebf3dad43 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -89,7 +89,7 @@ static int pcie_poll_cmd(struct controller *ctrl, int timeout)
do {
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
- if (slot_status == (u16) ~0) {
+ if (PCI_POSSIBLE_ERROR(slot_status)) {
ctrl_info(ctrl, "%s: no response from device\n",
__func__);
return 0;
@@ -165,7 +165,7 @@ static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd,
pcie_wait_cmd(ctrl);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
- if (slot_ctrl == (u16) ~0) {
+ if (PCI_POSSIBLE_ERROR(slot_ctrl)) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
goto out;
}
@@ -236,7 +236,7 @@ int pciehp_check_link_active(struct controller *ctrl)
int ret;
ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
- if (ret == PCIBIOS_DEVICE_NOT_FOUND || lnk_status == (u16)~0)
+ if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
return -ENODEV;
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
@@ -443,7 +443,7 @@ int pciehp_card_present(struct controller *ctrl)
int ret;
ret = pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
- if (ret == PCIBIOS_DEVICE_NOT_FOUND || slot_status == (u16)~0)
+ if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(slot_status))
return -ENODEV;
return !!(slot_status & PCI_EXP_SLTSTA_PDS);
@@ -583,7 +583,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl,
* the corresponding link change may have been ignored above.
* Synthesize it to ensure that it is acted on.
*/
- down_read(&ctrl->reset_lock);
+ down_read_nested(&ctrl->reset_lock, ctrl->depth);
if (!pciehp_check_link_active(ctrl))
pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
up_read(&ctrl->reset_lock);
@@ -621,7 +621,7 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
read_status:
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);
- if (status == (u16) ~0) {
+ if (PCI_POSSIBLE_ERROR(status)) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
if (parent)
pm_runtime_put(parent);
@@ -642,6 +642,8 @@ read_status:
*/
if (ctrl->power_fault_detected)
status &= ~PCI_EXP_SLTSTA_PFD;
+ else if (status & PCI_EXP_SLTSTA_PFD)
+ ctrl->power_fault_detected = true;
events |= status;
if (!events) {
@@ -651,7 +653,7 @@ read_status:
}
if (status) {
- pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
+ pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
/*
* In MSI mode, all event bits must be zero before the port
@@ -725,8 +727,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
}
/* Check Power Fault Detected */
- if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
- ctrl->power_fault_detected = 1;
+ if (events & PCI_EXP_SLTSTA_PFD) {
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
PCI_EXP_SLTCTL_ATTN_IND_ON);
@@ -746,7 +747,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
* Disable requests have higher priority than Presence Detect Changed
* or Data Link Layer State Changed events.
*/
- down_read(&ctrl->reset_lock);
+ down_read_nested(&ctrl->reset_lock, ctrl->depth);
if (events & DISABLE_SLOT)
pciehp_handle_disable_request(ctrl);
else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
@@ -906,7 +907,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
if (probe)
return 0;
- down_write(&ctrl->reset_lock);
+ down_write_nested(&ctrl->reset_lock, ctrl->depth);
if (!ATTN_BUTTN(ctrl)) {
ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
@@ -962,6 +963,20 @@ static inline void dbg_ctrl(struct controller *ctrl)
#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
+static inline int pcie_hotplug_depth(struct pci_dev *dev)
+{
+ struct pci_bus *bus = dev->bus;
+ int depth = 0;
+
+ while (bus->parent) {
+ bus = bus->parent;
+ if (bus->self && bus->self->is_hotplug_bridge)
+ depth++;
+ }
+
+ return depth;
+}
+
struct controller *pcie_init(struct pcie_device *dev)
{
struct controller *ctrl;
@@ -975,6 +990,7 @@ struct controller *pcie_init(struct pcie_device *dev)
return NULL;
ctrl->pcie = dev;
+ ctrl->depth = pcie_hotplug_depth(dev->port);
pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
if (pdev->hotplug_user_indicators)
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index 0d63541c4052..e9cf318e6670 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -28,6 +28,7 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
msi_domain_free_irqs_descs_locked(domain, &dev->dev);
else
pci_msi_legacy_teardown_msi_irqs(dev);
+ msi_free_msi_descs(&dev->dev);
}
/**
@@ -171,8 +172,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
- info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS |
- MSI_FLAG_FREE_MSI_DESCS;
+ info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
info->flags |= MSI_FLAG_MUST_REACTIVATE;
diff --git a/drivers/pci/msi/legacy.c b/drivers/pci/msi/legacy.c
index cdbb4689db78..db761adef652 100644
--- a/drivers/pci/msi/legacy.c
+++ b/drivers/pci/msi/legacy.c
@@ -77,5 +77,4 @@ void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
{
msi_device_destroy_sysfs(&dev->dev);
arch_teardown_msi_irqs(dev);
- msi_free_msi_descs(&dev->dev);
}
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 0b1237cff239..cb2e8351c2cc 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -247,7 +247,7 @@ void of_pci_check_probe_only(void)
else
pci_clear_flags(PCI_PROBE_ONLY);
- pr_info("PROBE_ONLY %sabled\n", val ? "en" : "dis");
+ pr_info("PROBE_ONLY %s\n", val ? "enabled" : "disabled");
}
EXPORT_SYMBOL_GPL(of_pci_check_probe_only);
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 454d5f6f16ff..1015274bd2fe 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -710,7 +710,7 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
if (!ret)
goto out;
- if (unlikely(!percpu_ref_tryget_live(ref))) {
+ if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
gen_pool_free(p2pdma->pool, (unsigned long) ret, size);
ret = NULL;
goto out;
diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index db97cddfc85e..c994ebec2360 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -139,8 +139,13 @@ struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = {
.ro = GENMASK(7, 0),
},
+ /*
+ * If expansion ROM is unsupported then ROM Base Address register must
+ * be implemented as read-only register that return 0 when read, same
+ * as for unused Base Address registers.
+ */
[PCI_ROM_ADDRESS1 / 4] = {
- .rw = GENMASK(31, 11) | BIT(0),
+ .ro = ~0,
},
/*
@@ -171,41 +176,55 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] =
[PCI_CAP_LIST_ID / 4] = {
/*
* Capability ID, Next Capability Pointer and
- * Capabilities register are all read-only.
+ * bits [14:0] of Capabilities register are all read-only.
+ * Bit 15 of Capabilities register is reserved.
*/
- .ro = ~0,
+ .ro = GENMASK(30, 0),
},
[PCI_EXP_DEVCAP / 4] = {
- .ro = ~0,
+ /*
+ * Bits [31:29] and [17:16] are reserved.
+ * Bits [27:18] are reserved for non-upstream ports.
+ * Bits 28 and [14:6] are reserved for non-endpoint devices.
+ * Other bits are read-only.
+ */
+ .ro = BIT(15) | GENMASK(5, 0),
},
[PCI_EXP_DEVCTL / 4] = {
- /* Device control register is RW */
- .rw = GENMASK(15, 0),
+ /*
+ * Device control register is RW, except bit 15 which is
+ * reserved for non-endpoints or non-PCIe-to-PCI/X bridges.
+ */
+ .rw = GENMASK(14, 0),
/*
* Device status register has bits 6 and [3:0] W1C, [5:4] RO,
- * the rest is reserved
+ * the rest is reserved. Also bit 6 is reserved for non-upstream
+ * ports.
*/
- .w1c = (BIT(6) | GENMASK(3, 0)) << 16,
+ .w1c = GENMASK(3, 0) << 16,
.ro = GENMASK(5, 4) << 16,
},
[PCI_EXP_LNKCAP / 4] = {
- /* All bits are RO, except bit 23 which is reserved */
- .ro = lower_32_bits(~BIT(23)),
+ /*
+ * All bits are RO, except bit 23 which is reserved and
+ * bit 18 which is reserved for non-upstream ports.
+ */
+ .ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)),
},
[PCI_EXP_LNKCTL / 4] = {
/*
* Link control has bits [15:14], [11:3] and [1:0] RW, the
- * rest is reserved.
+ * rest is reserved. Bit 8 is reserved for non-upstream ports.
*
* Link status has bits [13:0] RO, and bits [15:14]
* W1C.
*/
- .rw = GENMASK(15, 14) | GENMASK(11, 3) | GENMASK(1, 0),
+ .rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0),
.ro = GENMASK(13, 0) << 16,
.w1c = GENMASK(15, 14) << 16,
},
@@ -251,6 +270,49 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] =
.ro = GENMASK(15, 0) | PCI_EXP_RTSTA_PENDING,
.w1c = PCI_EXP_RTSTA_PME,
},
+
+ [PCI_EXP_DEVCAP2 / 4] = {
+ /*
+ * Device capabilities 2 register has reserved bits [30:27].
+ * Also bits [26:24] are reserved for non-upstream ports.
+ */
+ .ro = BIT(31) | GENMASK(23, 0),
+ },
+
+ [PCI_EXP_DEVCTL2 / 4] = {
+ /*
+ * Device control 2 register is RW. Bit 11 is reserved for
+ * non-upstream ports.
+ *
+ * Device status 2 register is reserved.
+ */
+ .rw = GENMASK(15, 12) | GENMASK(10, 0),
+ },
+
+ [PCI_EXP_LNKCAP2 / 4] = {
+ /* Link capabilities 2 register has reserved bits [30:25] and 0. */
+ .ro = BIT(31) | GENMASK(24, 1),
+ },
+
+ [PCI_EXP_LNKCTL2 / 4] = {
+ /*
+ * Link control 2 register is RW.
+ *
+ * Link status 2 register has bits 5, 15 W1C;
+ * bits 10, 11 reserved and others are RO.
+ */
+ .rw = GENMASK(15, 0),
+ .w1c = (BIT(15) | BIT(5)) << 16,
+ .ro = (GENMASK(14, 12) | GENMASK(9, 6) | GENMASK(4, 0)) << 16,
+ },
+
+ [PCI_EXP_SLTCAP2 / 4] = {
+ /* Slot capabilities 2 register is reserved. */
+ },
+
+ [PCI_EXP_SLTCTL2 / 4] = {
+ /* Both Slot control 2 and Slot status 2 registers are reserved. */
+ },
};
/*
@@ -265,7 +327,11 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
{
BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END);
- bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16);
+ /*
+ * class_revision: Class is high 24 bits and revision is low 8 bit of this member,
+ * while class for PCI Bridge Normal Decode has the 24-bit value: PCI_CLASS_BRIDGE_PCI << 8
+ */
+ bridge->conf.class_revision |= cpu_to_le32((PCI_CLASS_BRIDGE_PCI << 8) << 8);
bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE;
bridge->conf.cache_line_size = 0x10;
bridge->conf.status = cpu_to_le16(PCI_STATUS_CAP_LIST);
@@ -277,11 +343,9 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
if (bridge->has_pcie) {
bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START;
+ bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST);
bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP;
- /* Set PCIe v2, root port, slot support */
- bridge->pcie_conf.cap =
- cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4 | 2 |
- PCI_EXP_FLAGS_SLOT);
+ bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4);
bridge->pcie_cap_regs_behavior =
kmemdup(pcie_cap_regs_behavior,
sizeof(pcie_cap_regs_behavior),
@@ -290,6 +354,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
kfree(bridge->pci_regs_behavior);
return -ENOMEM;
}
+ /* These bits are applicable only for PCI and reserved on PCIe */
+ bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &=
+ ~GENMASK(15, 8);
+ bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &=
+ ~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE |
+ PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT |
+ PCI_COMMAND_FAST_BACK) |
+ (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+ PCI_STATUS_DEVSEL_MASK) << 16);
+ bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &=
+ ~GENMASK(31, 24);
+ bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &=
+ ~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+ PCI_STATUS_DEVSEL_MASK) << 16);
+ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &=
+ ~((PCI_BRIDGE_CTL_MASTER_ABORT |
+ BIT(8) | BIT(9) | BIT(11)) << 16);
+ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &=
+ ~((PCI_BRIDGE_CTL_FAST_BACK) << 16);
+ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &=
+ ~(BIT(10) << 16);
}
if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) {
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f3f606c232a8..9ecce435fb3f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1115,7 +1115,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
return -EIO;
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
- if (pmcsr == (u16) ~0) {
+ if (PCI_POSSIBLE_ERROR(pmcsr)) {
pci_err(dev, "can't change power state from %s to %s (config space inaccessible)\n",
pci_power_name(dev->current_state),
pci_power_name(state));
@@ -1271,16 +1271,16 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
* After reset, the device should not silently discard config
* requests, but it may still indicate that it needs more time by
* responding to them with CRS completions. The Root Port will
- * generally synthesize ~0 data to complete the read (except when
- * CRS SV is enabled and the read was for the Vendor ID; in that
- * case it synthesizes 0x0001 data).
+ * generally synthesize ~0 (PCI_ERROR_RESPONSE) data to complete
+ * the read (except when CRS SV is enabled and the read was for the
+ * Vendor ID; in that case it synthesizes 0x0001 data).
*
* Wait for the device to return a non-CRS completion. Read the
* Command register instead of Vendor ID so we don't have to
* contend with the CRS SV value.
*/
pci_read_config_dword(dev, PCI_COMMAND, &id);
- while (id == ~0) {
+ while (PCI_POSSIBLE_ERROR(id)) {
if (delay > timeout) {
pci_warn(dev, "not ready %dms after %s; giving up\n",
delay - 1, reset_type);
@@ -1556,7 +1556,7 @@ static void pci_save_ltr_state(struct pci_dev *dev)
{
int ltr;
struct pci_cap_saved_state *save_state;
- u16 *cap;
+ u32 *cap;
if (!pci_is_pcie(dev))
return;
@@ -1571,25 +1571,25 @@ static void pci_save_ltr_state(struct pci_dev *dev)
return;
}
- cap = (u16 *)&save_state->cap.data[0];
- pci_read_config_word(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, cap++);
- pci_read_config_word(dev, ltr + PCI_LTR_MAX_NOSNOOP_LAT, cap++);
+ /* Some broken devices only support dword access to LTR */
+ cap = &save_state->cap.data[0];
+ pci_read_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, cap);
}
static void pci_restore_ltr_state(struct pci_dev *dev)
{
struct pci_cap_saved_state *save_state;
int ltr;
- u16 *cap;
+ u32 *cap;
save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR);
ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
if (!save_state || !ltr)
return;
- cap = (u16 *)&save_state->cap.data[0];
- pci_write_config_word(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, *cap++);
- pci_write_config_word(dev, ltr + PCI_LTR_MAX_NOSNOOP_LAT, *cap++);
+ /* Some broken devices only support dword access to LTR */
+ cap = &save_state->cap.data[0];
+ pci_write_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, *cap);
}
/**
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 52c74682601a..a96b7424c9bc 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -41,11 +41,6 @@
#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \
ASPM_STATE_L1SS)
-struct aspm_latency {
- u32 l0s; /* L0s latency (nsec) */
- u32 l1; /* L1 latency (nsec) */
-};
-
struct pcie_link_state {
struct pci_dev *pdev; /* Upstream component of the Link */
struct pci_dev *downstream; /* Downstream component, function 0 */
@@ -65,15 +60,6 @@ struct pcie_link_state {
u32 clkpm_enabled:1; /* Current Clock PM state */
u32 clkpm_default:1; /* Default Clock PM state by BIOS */
u32 clkpm_disable:1; /* Clock PM disabled */
-
- /* Exit latencies */
- struct aspm_latency latency_up; /* Upstream direction exit latency */
- struct aspm_latency latency_dw; /* Downstream direction exit latency */
- /*
- * Endpoint acceptable latencies. A pcie downstream port only
- * has one slot under it, so at most there are 8 functions.
- */
- struct aspm_latency acceptable[8];
};
static int aspm_disabled, aspm_force;
@@ -105,6 +91,20 @@ static const char *policy_str[] = {
#define LINK_RETRAIN_TIMEOUT HZ
+/*
+ * The L1 PM substate capability is only implemented in function 0 in a
+ * multi function device.
+ */
+static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
+{
+ struct pci_dev *child;
+
+ list_for_each_entry(child, &linkbus->devices, bus_list)
+ if (PCI_FUNC(child->devfn) == 0)
+ return child;
+ return NULL;
+}
+
static int policy_to_aspm_state(struct pcie_link_state *link)
{
switch (aspm_policy) {
@@ -378,8 +378,10 @@ static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
static void pcie_aspm_check_latency(struct pci_dev *endpoint)
{
- u32 latency, l1_switch_latency = 0;
- struct aspm_latency *acceptable;
+ u32 latency, encoding, lnkcap_up, lnkcap_dw;
+ u32 l1_switch_latency = 0, latency_up_l0s;
+ u32 latency_up_l1, latency_dw_l0s, latency_dw_l1;
+ u32 acceptable_l0s, acceptable_l1;
struct pcie_link_state *link;
/* Device not in D0 doesn't need latency check */
@@ -388,17 +390,36 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
return;
link = endpoint->bus->self->link_state;
- acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)];
+
+ /* Calculate endpoint L0s acceptable latency */
+ encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L0S) >> 6;
+ acceptable_l0s = calc_l0s_acceptable(encoding);
+
+ /* Calculate endpoint L1 acceptable latency */
+ encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L1) >> 9;
+ acceptable_l1 = calc_l1_acceptable(encoding);
while (link) {
+ struct pci_dev *dev = pci_function_0(link->pdev->subordinate);
+
+ /* Read direction exit latencies */
+ pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP,
+ &lnkcap_up);
+ pcie_capability_read_dword(dev, PCI_EXP_LNKCAP,
+ &lnkcap_dw);
+ latency_up_l0s = calc_l0s_latency(lnkcap_up);
+ latency_up_l1 = calc_l1_latency(lnkcap_up);
+ latency_dw_l0s = calc_l0s_latency(lnkcap_dw);
+ latency_dw_l1 = calc_l1_latency(lnkcap_dw);
+
/* Check upstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
- (link->latency_up.l0s > acceptable->l0s))
+ (latency_up_l0s > acceptable_l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_UP;
/* Check downstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
- (link->latency_dw.l0s > acceptable->l0s))
+ (latency_dw_l0s > acceptable_l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_DW;
/*
* Check L1 latency.
@@ -413,9 +434,9 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
* L1 exit latencies advertised by a device include L1
* substate latencies (and hence do not do any check).
*/
- latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1);
+ latency = max_t(u32, latency_up_l1, latency_dw_l1);
if ((link->aspm_capable & ASPM_STATE_L1) &&
- (latency + l1_switch_latency > acceptable->l1))
+ (latency + l1_switch_latency > acceptable_l1))
link->aspm_capable &= ~ASPM_STATE_L1;
l1_switch_latency += 1000;
@@ -423,20 +444,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
}
}
-/*
- * The L1 PM substate capability is only implemented in function 0 in a
- * multi function device.
- */
-static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
-{
- struct pci_dev *child;
-
- list_for_each_entry(child, &linkbus->devices, bus_list)
- if (PCI_FUNC(child->devfn) == 0)
- return child;
- return NULL;
-}
-
static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
u32 clear, u32 set)
{
@@ -496,6 +503,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
encode_l12_threshold(l1_2_threshold, &scale, &value);
ctl1 |= t_common_mode << 8 | scale << 29 | value << 16;
+ /* Some broken devices only support dword access to L1 SS */
pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2);
pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
@@ -593,8 +601,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
link->aspm_enabled |= ASPM_STATE_L0S_UP;
if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
link->aspm_enabled |= ASPM_STATE_L0S_DW;
- link->latency_up.l0s = calc_l0s_latency(parent_lnkcap);
- link->latency_dw.l0s = calc_l0s_latency(child_lnkcap);
/* Setup L1 state */
if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
@@ -602,8 +608,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
link->aspm_enabled |= ASPM_STATE_L1;
- link->latency_up.l1 = calc_l1_latency(parent_lnkcap);
- link->latency_dw.l1 = calc_l1_latency(child_lnkcap);
/* Setup L1 substate */
pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP,
@@ -660,22 +664,10 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
/* Get and check endpoint acceptable latencies */
list_for_each_entry(child, &linkbus->devices, bus_list) {
- u32 reg32, encoding;
- struct aspm_latency *acceptable =
- &link->acceptable[PCI_FUNC(child->devfn)];
-
if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT &&
pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)
continue;
- pcie_capability_read_dword(child, PCI_EXP_DEVCAP, &reg32);
- /* Calculate endpoint L0s acceptable latency */
- encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
- acceptable->l0s = calc_l0s_acceptable(encoding);
- /* Calculate endpoint L1 acceptable latency */
- encoding = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
- acceptable->l1 = calc_l1_acceptable(encoding);
-
pcie_aspm_check_latency(child);
}
}
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index c556e7beafe3..3e9afee02e8d 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -79,7 +79,7 @@ static bool dpc_completed(struct pci_dev *pdev)
u16 status;
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status);
- if ((status != 0xffff) && (status & PCI_EXP_DPC_STATUS_TRIGGER))
+ if ((!PCI_POSSIBLE_ERROR(status)) && (status & PCI_EXP_DPC_STATUS_TRIGGER))
return false;
if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags))
@@ -312,7 +312,7 @@ static irqreturn_t dpc_irq(int irq, void *context)
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
- if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0))
+ if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || PCI_POSSIBLE_ERROR(status))
return IRQ_NONE;
pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 1d0dd77fed3a..ef8ce436ead9 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -224,7 +224,7 @@ static void pcie_pme_work_fn(struct work_struct *work)
break;
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
- if (rtsta == (u32) ~0)
+ if (PCI_POSSIBLE_ERROR(rtsta))
break;
if (rtsta & PCI_EXP_RTSTA_PME) {
@@ -274,7 +274,7 @@ static irqreturn_t pcie_pme_irq(int irq, void *context)
spin_lock_irqsave(&data->lock, flags);
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
- if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) {
+ if (PCI_POSSIBLE_ERROR(rtsta) || !(rtsta & PCI_EXP_RTSTA_PME)) {
spin_unlock_irqrestore(&data->lock, flags);
return IRQ_NONE;
}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 443efb00e219..17a969942d37 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -206,14 +206,14 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
* memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
* 1 must be clear.
*/
- if (sz == 0xffffffff)
+ if (PCI_POSSIBLE_ERROR(sz))
sz = 0;
/*
* I don't know how l can have all bits set. Copied from old code.
* Maybe it fixes a bug on some ancient platform.
*/
- if (l == 0xffffffff)
+ if (PCI_POSSIBLE_ERROR(l))
l = 0;
if (type == pci_bar_unknown) {
@@ -898,8 +898,6 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
bridge->bus = bus;
- /* Temporarily move resources off the list */
- list_splice_init(&bridge->windows, &resources);
bus->sysdata = bridge->sysdata;
bus->ops = bridge->ops;
bus->number = bus->busn_res.start = bridge->busnr;
@@ -925,6 +923,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
if (err)
goto free;
+ /* Temporarily move resources off the list */
+ list_splice_init(&bridge->windows, &resources);
err = device_add(&bridge->dev);
if (err) {
put_device(&bridge->dev);
@@ -1579,20 +1579,12 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev)
static void set_pcie_thunderbolt(struct pci_dev *dev)
{
- int vsec = 0;
- u32 header;
+ u16 vsec;
- while ((vsec = pci_find_next_ext_capability(dev, vsec,
- PCI_EXT_CAP_ID_VNDR))) {
- pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER, &header);
-
- /* Is the device part of a Thunderbolt controller? */
- if (dev->vendor == PCI_VENDOR_ID_INTEL &&
- PCI_VNDR_HEADER_ID(header) == PCI_VSEC_ID_INTEL_TBT) {
- dev->is_thunderbolt = 1;
- return;
- }
- }
+ /* Is the device part of a Thunderbolt controller? */
+ vsec = pci_find_vsec_capability(dev, PCI_VENDOR_ID_INTEL, PCI_VSEC_ID_INTEL_TBT);
+ if (vsec)
+ dev->is_thunderbolt = 1;
}
static void set_pcie_untrusted(struct pci_dev *dev)
@@ -1683,7 +1675,7 @@ static int pci_cfg_space_size_ext(struct pci_dev *dev)
if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
return PCI_CFG_SPACE_SIZE;
- if (status == 0xffffffff || pci_ext_cfg_is_aliased(dev))
+ if (PCI_POSSIBLE_ERROR(status) || pci_ext_cfg_is_aliased(dev))
return PCI_CFG_SPACE_SIZE;
return PCI_CFG_SPACE_EXP_SIZE;
@@ -2373,8 +2365,8 @@ bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
return false;
- /* Some broken boards return 0 or ~0 if a slot is empty: */
- if (*l == 0xffffffff || *l == 0x00000000 ||
+ /* Some broken boards return 0 or ~0 (PCI_ERROR_RESPONSE) if a slot is empty: */
+ if (PCI_POSSIBLE_ERROR(*l) || *l == 0x00000000 ||
*l == 0x0000ffff || *l == 0xffff0000)
return false;
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index cb18f8a13ab6..9c7edec64f7e 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -21,14 +21,14 @@ static int proc_initialized; /* = 0 */
static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
return fixed_size_llseek(file, off, whence, dev->cfg_size);
}
static ssize_t proc_bus_pci_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
unsigned int pos = *ppos;
unsigned int cnt, size;
@@ -114,7 +114,7 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
struct inode *ino = file_inode(file);
- struct pci_dev *dev = PDE_DATA(ino);
+ struct pci_dev *dev = pde_data(ino);
int pos = *ppos;
int size = dev->cfg_size;
int cnt, ret;
@@ -196,7 +196,7 @@ struct pci_filp_private {
static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
#ifdef HAVE_PCI_MMAP
struct pci_filp_private *fpriv = file->private_data;
#endif /* HAVE_PCI_MMAP */
@@ -244,7 +244,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
#ifdef HAVE_PCI_MMAP
static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct pci_dev *dev = PDE_DATA(file_inode(file));
+ struct pci_dev *dev = pde_data(file_inode(file));
struct pci_filp_private *fpriv = file->private_data;
int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b4cb658cce2b..d2dd6a6cda60 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -980,8 +980,8 @@ static void quirk_via_ioapic(struct pci_dev *dev)
else
tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
- pci_info(dev, "%sbling VIA external APIC routing\n",
- tmp == 0 ? "Disa" : "Ena");
+ pci_info(dev, "%s VIA external APIC routing\n",
+ tmp ? "Enabling" : "Disabling");
/* Offset 0x58: External APIC IRQ output control */
pci_write_config_byte(dev, 0x58, tmp);
@@ -4103,6 +4103,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120,
quirk_dma_func1_alias);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123,
quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c136 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9125,
+ quirk_dma_func1_alias);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128,
quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
@@ -5683,6 +5686,15 @@ SWITCHTEC_QUIRK(0x4268); /* PAX 68XG4 */
SWITCHTEC_QUIRK(0x4252); /* PAX 52XG4 */
SWITCHTEC_QUIRK(0x4236); /* PAX 36XG4 */
SWITCHTEC_QUIRK(0x4228); /* PAX 28XG4 */
+SWITCHTEC_QUIRK(0x4352); /* PFXA 52XG4 */
+SWITCHTEC_QUIRK(0x4336); /* PFXA 36XG4 */
+SWITCHTEC_QUIRK(0x4328); /* PFXA 28XG4 */
+SWITCHTEC_QUIRK(0x4452); /* PSXA 52XG4 */
+SWITCHTEC_QUIRK(0x4436); /* PSXA 36XG4 */
+SWITCHTEC_QUIRK(0x4428); /* PSXA 28XG4 */
+SWITCHTEC_QUIRK(0x4552); /* PAXA 52XG4 */
+SWITCHTEC_QUIRK(0x4536); /* PAXA 36XG4 */
+SWITCHTEC_QUIRK(0x4528); /* PAXA 28XG4 */
/*
* The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints.
@@ -5857,3 +5869,13 @@ static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
+
+static void rom_bar_overlap_defect(struct pci_dev *dev)
+{
+ pci_info(dev, "working around ROM BAR overlap defect\n");
+ dev->rom_bar_overlap = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1533, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1536, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1537, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1538, rom_bar_overlap_defect);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 7f1acb3918d0..439ac5f5907a 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -75,12 +75,16 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno)
* as zero when disabled, so don't update ROM BARs unless
* they're enabled. See
* https://lore.kernel.org/r/43147B3D.1030309@vc.cvut.cz/
+ * But we must update ROM BAR for buggy devices where even a
+ * disabled ROM can conflict with other BARs.
*/
- if (!(res->flags & IORESOURCE_ROM_ENABLE))
+ if (!(res->flags & IORESOURCE_ROM_ENABLE) &&
+ !dev->rom_bar_overlap)
return;
reg = dev->rom_base_reg;
- new |= PCI_ROM_ADDRESS_ENABLE;
+ if (res->flags & IORESOURCE_ROM_ENABLE)
+ new |= PCI_ROM_ADDRESS_ENABLE;
} else
return;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 751a26668e3a..a0c67191a8b9 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -96,11 +96,12 @@ static struct attribute *pci_slot_default_attrs[] = {
&pci_slot_attr_cur_speed.attr,
NULL,
};
+ATTRIBUTE_GROUPS(pci_slot_default);
static struct kobj_type pci_slot_ktype = {
.sysfs_ops = &pci_slot_sysfs_ops,
.release = &pci_slot_release,
- .default_attrs = pci_slot_default_attrs,
+ .default_groups = pci_slot_default_groups,
};
static char *make_slot_name(const char *name)
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 38c2b036fb8e..c36c1238c604 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -122,7 +122,7 @@ static void stuser_set_state(struct switchtec_user *stuser,
{
/* requires the mrpc_mutex to already be held when called */
- const char * const state_names[] = {
+ static const char * const state_names[] = {
[MRPC_IDLE] = "IDLE",
[MRPC_QUEUED] = "QUEUED",
[MRPC_RUNNING] = "RUNNING",
@@ -1779,6 +1779,15 @@ static const struct pci_device_id switchtec_pci_tbl[] = {
SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4), //PAX 52XG4
SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4), //PAX 36XG4
SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4), //PAX 28XG4
+ SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4), //PFXA 52XG4
+ SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4), //PFXA 36XG4
+ SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4), //PFXA 28XG4
+ SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4), //PSXA 52XG4
+ SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4), //PSXA 36XG4
+ SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4), //PSXA 28XG4
+ SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), //PAXA 52XG4
+ SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), //PAXA 36XG4
+ SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), //PAXA 28XG4
{0}
};
MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl);
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 6b1edfc890a3..92df2c2c5d07 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -20,6 +20,7 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_gpio.h>
+#include <linux/pci.h>
#include <linux/regmap.h>
#include <pcmcia/ss.h>
@@ -230,6 +231,7 @@ static int at91_cf_probe(struct platform_device *pdev)
struct at91_cf_socket *cf;
struct at91_cf_data *board;
struct resource *io;
+ struct resource realio;
int status;
board = devm_kzalloc(&pdev->dev, sizeof(*board), GFP_KERNEL);
@@ -307,7 +309,9 @@ static int at91_cf_probe(struct platform_device *pdev)
* io_offset is set to 0x10000 to avoid the check in static_find_io().
* */
cf->socket.io_offset = 0x10000;
- status = pci_ioremap_io(0x10000, cf->phys_baseaddr + CF_IO_PHYS);
+ realio.start = cf->socket.io_offset;
+ realio.end = realio.start + SZ_64K - 1;
+ status = pci_remap_iospace(&realio, cf->phys_baseaddr + CF_IO_PHYS);
if (status)
goto fail0a;
diff --git a/drivers/phy/amlogic/Kconfig b/drivers/phy/amlogic/Kconfig
index db5d0cd757e3..486ca23aba32 100644
--- a/drivers/phy/amlogic/Kconfig
+++ b/drivers/phy/amlogic/Kconfig
@@ -2,6 +2,16 @@
#
# Phy drivers for Amlogic platforms
#
+config PHY_MESON8_HDMI_TX
+ tristate "Meson8, Meson8b and Meson8m2 HDMI TX PHY driver"
+ depends on (ARCH_MESON && ARM) || COMPILE_TEST
+ depends on OF
+ select MFD_SYSCON
+ help
+ Enable this to support the HDMI TX PHYs found in Meson8,
+ Meson8b and Meson8m2 SoCs.
+ If unsure, say N.
+
config PHY_MESON8B_USB2
tristate "Meson8, Meson8b, Meson8m2 and GXBB USB2 PHY driver"
default ARCH_MESON
diff --git a/drivers/phy/amlogic/Makefile b/drivers/phy/amlogic/Makefile
index 8fa07fbd0d92..c0886c850bb0 100644
--- a/drivers/phy/amlogic/Makefile
+++ b/drivers/phy/amlogic/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PHY_MESON8_HDMI_TX) += phy-meson8-hdmi-tx.o
obj-$(CONFIG_PHY_MESON8B_USB2) += phy-meson8b-usb2.o
obj-$(CONFIG_PHY_MESON_GXL_USB2) += phy-meson-gxl-usb2.o
obj-$(CONFIG_PHY_MESON_G12A_USB2) += phy-meson-g12a-usb2.o
diff --git a/drivers/phy/amlogic/phy-meson8-hdmi-tx.c b/drivers/phy/amlogic/phy-meson8-hdmi-tx.c
new file mode 100644
index 000000000000..f9a6572c27d8
--- /dev/null
+++ b/drivers/phy/amlogic/phy-meson8-hdmi-tx.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Meson8, Meson8b and Meson8m2 HDMI TX PHY.
+ *
+ * Copyright (C) 2021 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+/*
+ * Unfortunately there is no detailed documentation available for the
+ * HHI_HDMI_PHY_CNTL0 register. CTL0 and CTL1 is all we know about.
+ * Magic register values in the driver below are taken from the vendor
+ * BSP / kernel.
+ */
+#define HHI_HDMI_PHY_CNTL0 0x3a0
+ #define HHI_HDMI_PHY_CNTL0_HDMI_CTL1 GENMASK(31, 16)
+ #define HHI_HDMI_PHY_CNTL0_HDMI_CTL0 GENMASK(15, 0)
+
+#define HHI_HDMI_PHY_CNTL1 0x3a4
+ #define HHI_HDMI_PHY_CNTL1_CLOCK_ENABLE BIT(1)
+ #define HHI_HDMI_PHY_CNTL1_SOFT_RESET BIT(0)
+
+#define HHI_HDMI_PHY_CNTL2 0x3a8
+
+struct phy_meson8_hdmi_tx_priv {
+ struct regmap *hhi;
+ struct clk *tmds_clk;
+};
+
+static int phy_meson8_hdmi_tx_init(struct phy *phy)
+{
+ struct phy_meson8_hdmi_tx_priv *priv = phy_get_drvdata(phy);
+
+ return clk_prepare_enable(priv->tmds_clk);
+}
+
+static int phy_meson8_hdmi_tx_exit(struct phy *phy)
+{
+ struct phy_meson8_hdmi_tx_priv *priv = phy_get_drvdata(phy);
+
+ clk_disable_unprepare(priv->tmds_clk);
+
+ return 0;
+}
+
+static int phy_meson8_hdmi_tx_power_on(struct phy *phy)
+{
+ struct phy_meson8_hdmi_tx_priv *priv = phy_get_drvdata(phy);
+ unsigned int i;
+ u16 hdmi_ctl0;
+
+ if (clk_get_rate(priv->tmds_clk) >= 2970UL * 1000 * 1000)
+ hdmi_ctl0 = 0x1e8b;
+ else
+ hdmi_ctl0 = 0x4d0b;
+
+ regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0,
+ FIELD_PREP(HHI_HDMI_PHY_CNTL0_HDMI_CTL1, 0x08c3) |
+ FIELD_PREP(HHI_HDMI_PHY_CNTL0_HDMI_CTL0, hdmi_ctl0));
+
+ regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1, 0x0);
+
+ /* Reset three times, just like the vendor driver does */
+ for (i = 0; i < 3; i++) {
+ regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1,
+ HHI_HDMI_PHY_CNTL1_CLOCK_ENABLE |
+ HHI_HDMI_PHY_CNTL1_SOFT_RESET);
+ usleep_range(1000, 2000);
+
+ regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1,
+ HHI_HDMI_PHY_CNTL1_CLOCK_ENABLE);
+ usleep_range(1000, 2000);
+ }
+
+ return 0;
+}
+
+static int phy_meson8_hdmi_tx_power_off(struct phy *phy)
+{
+ struct phy_meson8_hdmi_tx_priv *priv = phy_get_drvdata(phy);
+
+ regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0,
+ FIELD_PREP(HHI_HDMI_PHY_CNTL0_HDMI_CTL1, 0x0841) |
+ FIELD_PREP(HHI_HDMI_PHY_CNTL0_HDMI_CTL0, 0x8d00));
+
+ return 0;
+}
+
+static const struct phy_ops phy_meson8_hdmi_tx_ops = {
+ .init = phy_meson8_hdmi_tx_init,
+ .exit = phy_meson8_hdmi_tx_exit,
+ .power_on = phy_meson8_hdmi_tx_power_on,
+ .power_off = phy_meson8_hdmi_tx_power_off,
+ .owner = THIS_MODULE,
+};
+
+static int phy_meson8_hdmi_tx_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct phy_meson8_hdmi_tx_priv *priv;
+ struct phy_provider *phy_provider;
+ struct resource *res;
+ struct phy *phy;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->hhi = syscon_node_to_regmap(np->parent);
+ if (IS_ERR(priv->hhi))
+ return PTR_ERR(priv->hhi);
+
+ priv->tmds_clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(priv->tmds_clk))
+ return PTR_ERR(priv->tmds_clk);
+
+ phy = devm_phy_create(&pdev->dev, np, &phy_meson8_hdmi_tx_ops);
+ if (IS_ERR(phy))
+ return PTR_ERR(phy);
+
+ phy_set_drvdata(phy, priv);
+
+ phy_provider = devm_of_phy_provider_register(&pdev->dev,
+ of_phy_simple_xlate);
+
+ return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id phy_meson8_hdmi_tx_of_match[] = {
+ { .compatible = "amlogic,meson8-hdmi-tx-phy" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, phy_meson8_hdmi_tx_of_match);
+
+static struct platform_driver phy_meson8_hdmi_tx_driver = {
+ .probe = phy_meson8_hdmi_tx_probe,
+ .driver = {
+ .name = "phy-meson8-hdmi-tx",
+ .of_match_table = phy_meson8_hdmi_tx_of_match,
+ },
+};
+module_platform_driver(phy_meson8_hdmi_tx_driver);
+
+MODULE_AUTHOR("Martin Blumenstingl <martin.blumenstingl@googlemail.com>");
+MODULE_DESCRIPTION("Meson8, Meson8b and Meson8m2 HDMI TX PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/broadcom/phy-bcm-ns-usb2.c b/drivers/phy/broadcom/phy-bcm-ns-usb2.c
index 4b015b8a71c3..6a36e187d100 100644
--- a/drivers/phy/broadcom/phy-bcm-ns-usb2.c
+++ b/drivers/phy/broadcom/phy-bcm-ns-usb2.c
@@ -9,17 +9,23 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/err.h>
+#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
+#include <linux/regmap.h>
#include <linux/slab.h>
struct bcm_ns_usb2 {
struct device *dev;
struct clk *ref_clk;
struct phy *phy;
+ struct regmap *clkset;
+ void __iomem *base;
+
+ /* Deprecated binding */
void __iomem *dmu;
};
@@ -27,7 +33,6 @@ static int bcm_ns_usb2_phy_init(struct phy *phy)
{
struct bcm_ns_usb2 *usb2 = phy_get_drvdata(phy);
struct device *dev = usb2->dev;
- void __iomem *dmu = usb2->dmu;
u32 ref_clk_rate, usb2ctl, usb_pll_ndiv, usb_pll_pdiv;
int err = 0;
@@ -44,7 +49,10 @@ static int bcm_ns_usb2_phy_init(struct phy *phy)
goto err_clk_off;
}
- usb2ctl = readl(dmu + BCMA_DMU_CRU_USB2_CONTROL);
+ if (usb2->base)
+ usb2ctl = readl(usb2->base);
+ else
+ usb2ctl = readl(usb2->dmu + BCMA_DMU_CRU_USB2_CONTROL);
if (usb2ctl & BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_PDIV_MASK) {
usb_pll_pdiv = usb2ctl;
@@ -58,15 +66,24 @@ static int bcm_ns_usb2_phy_init(struct phy *phy)
usb_pll_ndiv = (1920000000 * usb_pll_pdiv) / ref_clk_rate;
/* Unlock DMU PLL settings with some magic value */
- writel(0x0000ea68, dmu + BCMA_DMU_CRU_CLKSET_KEY);
+ if (usb2->clkset)
+ regmap_write(usb2->clkset, 0, 0x0000ea68);
+ else
+ writel(0x0000ea68, usb2->dmu + BCMA_DMU_CRU_CLKSET_KEY);
/* Write USB 2.0 PLL control setting */
usb2ctl &= ~BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_NDIV_MASK;
usb2ctl |= usb_pll_ndiv << BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_NDIV_SHIFT;
- writel(usb2ctl, dmu + BCMA_DMU_CRU_USB2_CONTROL);
+ if (usb2->base)
+ writel(usb2ctl, usb2->base);
+ else
+ writel(usb2ctl, usb2->dmu + BCMA_DMU_CRU_USB2_CONTROL);
/* Lock DMU PLL settings */
- writel(0x00000000, dmu + BCMA_DMU_CRU_CLKSET_KEY);
+ if (usb2->clkset)
+ regmap_write(usb2->clkset, 0, 0x00000000);
+ else
+ writel(0x00000000, usb2->dmu + BCMA_DMU_CRU_CLKSET_KEY);
err_clk_off:
clk_disable_unprepare(usb2->ref_clk);
@@ -90,15 +107,32 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev)
return -ENOMEM;
usb2->dev = dev;
- usb2->dmu = devm_platform_ioremap_resource_byname(pdev, "dmu");
- if (IS_ERR(usb2->dmu)) {
- dev_err(dev, "Failed to map DMU regs\n");
- return PTR_ERR(usb2->dmu);
+ if (of_find_property(dev->of_node, "brcm,syscon-clkset", NULL)) {
+ usb2->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(usb2->base)) {
+ dev_err(dev, "Failed to map control reg\n");
+ return PTR_ERR(usb2->base);
+ }
+
+ usb2->clkset = syscon_regmap_lookup_by_phandle(dev->of_node,
+ "brcm,syscon-clkset");
+ if (IS_ERR(usb2->clkset)) {
+ dev_err(dev, "Failed to lookup clkset regmap\n");
+ return PTR_ERR(usb2->clkset);
+ }
+ } else {
+ usb2->dmu = devm_platform_ioremap_resource_byname(pdev, "dmu");
+ if (IS_ERR(usb2->dmu)) {
+ dev_err(dev, "Failed to map DMU regs\n");
+ return PTR_ERR(usb2->dmu);
+ }
+
+ dev_warn(dev, "using deprecated DT binding\n");
}
usb2->ref_clk = devm_clk_get(dev, "phy-ref-clk");
if (IS_ERR(usb2->ref_clk)) {
- dev_err(dev, "Clock not defined\n");
+ dev_err_probe(dev, PTR_ERR(usb2->ref_clk), "failed to get ref clk\n");
return PTR_ERR(usb2->ref_clk);
}
diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c
index e93818e3991f..da24acd26666 100644
--- a/drivers/phy/cadence/phy-cadence-sierra.c
+++ b/drivers/phy/cadence/phy-cadence-sierra.c
@@ -23,6 +23,9 @@
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/phy/phy-cadence.h>
+#define NUM_SSC_MODE 3
+#define NUM_PHY_TYPE 4
+
/* PHY register offsets */
#define SIERRA_COMMON_CDB_OFFSET 0x0
#define SIERRA_MACRO_ID_REG 0x0
@@ -31,12 +34,21 @@
#define SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG 0x49
#define SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG 0x4A
#define SIERRA_CMN_PLLLC_LOCK_CNTSTART_PREG 0x4B
+#define SIERRA_CMN_PLLLC_CLK1_PREG 0x4D
#define SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG 0x4F
#define SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG 0x50
+#define SIERRA_CMN_PLLLC_DSMCORR_PREG 0x51
+#define SIERRA_CMN_PLLLC_SS_PREG 0x52
+#define SIERRA_CMN_PLLLC_SS_AMP_STEP_SIZE_PREG 0x53
+#define SIERRA_CMN_PLLLC_SSTWOPT_PREG 0x54
#define SIERRA_CMN_PLLLC_SS_TIME_STEPSIZE_MODE_PREG 0x62
+#define SIERRA_CMN_PLLLC_LOCK_DELAY_CTRL_PREG 0x63
#define SIERRA_CMN_REFRCV_PREG 0x98
#define SIERRA_CMN_REFRCV1_PREG 0xB8
#define SIERRA_CMN_PLLLC1_GEN_PREG 0xC2
+#define SIERRA_CMN_PLLLC1_LF_COEFF_MODE0_PREG 0xCA
+#define SIERRA_CMN_PLLLC1_BWCAL_MODE0_PREG 0xD0
+#define SIERRA_CMN_PLLLC1_SS_TIME_STEPSIZE_MODE_PREG 0xE2
#define SIERRA_LANE_CDB_OFFSET(ln, block_offset, reg_offset) \
((0x4000 << (block_offset)) + \
@@ -49,7 +61,11 @@
#define SIERRA_DET_STANDEC_E_PREG 0x004
#define SIERRA_PSM_LANECAL_DLY_A1_RESETS_PREG 0x008
#define SIERRA_PSM_A0IN_TMR_PREG 0x009
+#define SIERRA_PSM_A3IN_TMR_PREG 0x00C
#define SIERRA_PSM_DIAG_PREG 0x015
+#define SIERRA_PSC_LN_A3_PREG 0x023
+#define SIERRA_PSC_LN_A4_PREG 0x024
+#define SIERRA_PSC_LN_IDLE_PREG 0x026
#define SIERRA_PSC_TX_A0_PREG 0x028
#define SIERRA_PSC_TX_A1_PREG 0x029
#define SIERRA_PSC_TX_A2_PREG 0x02A
@@ -59,18 +75,22 @@
#define SIERRA_PSC_RX_A2_PREG 0x032
#define SIERRA_PSC_RX_A3_PREG 0x033
#define SIERRA_PLLCTRL_SUBRATE_PREG 0x03A
+#define SIERRA_PLLCTRL_GEN_A_PREG 0x03B
#define SIERRA_PLLCTRL_GEN_D_PREG 0x03E
#define SIERRA_PLLCTRL_CPGAIN_MODE_PREG 0x03F
#define SIERRA_PLLCTRL_STATUS_PREG 0x044
#define SIERRA_CLKPATH_BIASTRIM_PREG 0x04B
#define SIERRA_DFE_BIASTRIM_PREG 0x04C
#define SIERRA_DRVCTRL_ATTEN_PREG 0x06A
+#define SIERRA_DRVCTRL_BOOST_PREG 0x06F
#define SIERRA_CLKPATHCTRL_TMR_PREG 0x081
#define SIERRA_RX_CREQ_FLTR_A_MODE3_PREG 0x085
#define SIERRA_RX_CREQ_FLTR_A_MODE2_PREG 0x086
#define SIERRA_RX_CREQ_FLTR_A_MODE1_PREG 0x087
#define SIERRA_RX_CREQ_FLTR_A_MODE0_PREG 0x088
+#define SIERRA_CREQ_DCBIASATTEN_OVR_PREG 0x08C
#define SIERRA_CREQ_CCLKDET_MODE01_PREG 0x08E
+#define SIERRA_RX_CTLE_CAL_PREG 0x08F
#define SIERRA_RX_CTLE_MAINTENANCE_PREG 0x091
#define SIERRA_CREQ_FSMCLK_SEL_PREG 0x092
#define SIERRA_CREQ_EQ_CTRL_PREG 0x093
@@ -120,15 +140,28 @@
#define SIERRA_DEQ_ALUT12 0x114
#define SIERRA_DEQ_ALUT13 0x115
#define SIERRA_DEQ_DFETAP_CTRL_PREG 0x128
+#define SIERRA_DEQ_DFETAP0 0x129
+#define SIERRA_DEQ_DFETAP1 0x12B
+#define SIERRA_DEQ_DFETAP2 0x12D
+#define SIERRA_DEQ_DFETAP3 0x12F
+#define SIERRA_DEQ_DFETAP4 0x131
#define SIERRA_DFE_EN_1010_IGNORE_PREG 0x134
+#define SIERRA_DEQ_PRECUR_PREG 0x138
+#define SIERRA_DEQ_POSTCUR_PREG 0x140
+#define SIERRA_DEQ_POSTCUR_DECR_PREG 0x142
#define SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG 0x150
#define SIERRA_DEQ_TAU_CTRL2_PREG 0x151
+#define SIERRA_DEQ_TAU_CTRL3_PREG 0x152
+#define SIERRA_DEQ_OPENEYE_CTRL_PREG 0x158
#define SIERRA_DEQ_PICTRL_PREG 0x161
#define SIERRA_CPICAL_TMRVAL_MODE1_PREG 0x170
#define SIERRA_CPICAL_TMRVAL_MODE0_PREG 0x171
#define SIERRA_CPICAL_PICNT_MODE1_PREG 0x174
#define SIERRA_CPI_OUTBUF_RATESEL_PREG 0x17C
+#define SIERRA_CPI_RESBIAS_BIN_PREG 0x17E
+#define SIERRA_CPI_TRIM_PREG 0x17F
#define SIERRA_CPICAL_RES_STARTCODE_MODE23_PREG 0x183
+#define SIERRA_EPI_CTRL_PREG 0x187
#define SIERRA_LFPSDET_SUPPORT_PREG 0x188
#define SIERRA_LFPSFILT_NS_PREG 0x18A
#define SIERRA_LFPSFILT_RD_PREG 0x18B
@@ -142,15 +175,36 @@
#define SIERRA_DEQ_TAU_CTRL1_FAST_MAINT_PREG 0x14F
#define SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG 0x150
-#define SIERRA_PHY_CONFIG_CTRL_OFFSET(block_offset) \
- (0xc000 << (block_offset))
+/* PHY PCS common registers */
+#define SIERRA_PHY_PCS_COMMON_OFFSET(block_offset) \
+ (0xc000 << (block_offset))
+#define SIERRA_PHY_PIPE_CMN_CTRL1 0x0
#define SIERRA_PHY_PLL_CFG 0xe
+/* PHY PCS lane registers */
+#define SIERRA_PHY_PCS_LANE_CDB_OFFSET(ln, block_offset, reg_offset) \
+ ((0xD000 << (block_offset)) + \
+ (((ln) << 8) << (reg_offset)))
+
+#define SIERRA_PHY_ISO_LINK_CTRL 0xB
+
+/* PHY PMA common registers */
+#define SIERRA_PHY_PMA_COMMON_OFFSET(block_offset) \
+ (0xE000 << (block_offset))
+#define SIERRA_PHY_PMA_CMN_CTRL 0x000
+
+/* PHY PMA lane registers */
+#define SIERRA_PHY_PMA_LANE_CDB_OFFSET(ln, block_offset, reg_offset) \
+ ((0xF000 << (block_offset)) + \
+ (((ln) << 8) << (reg_offset)))
+
+#define SIERRA_PHY_PMA_XCVR_CTRL 0x000
+
#define SIERRA_MACRO_ID 0x00007364
#define SIERRA_MAX_LANES 16
#define PLL_LOCK_TIME 100000
-#define CDNS_SIERRA_OUTPUT_CLOCKS 2
+#define CDNS_SIERRA_OUTPUT_CLOCKS 3
#define CDNS_SIERRA_INPUT_CLOCKS 5
enum cdns_sierra_clock_input {
PHY_CLK,
@@ -167,12 +221,21 @@ static const struct reg_field macro_id_type =
REG_FIELD(SIERRA_MACRO_ID_REG, 0, 15);
static const struct reg_field phy_pll_cfg_1 =
REG_FIELD(SIERRA_PHY_PLL_CFG, 1, 1);
+static const struct reg_field pma_cmn_ready =
+ REG_FIELD(SIERRA_PHY_PMA_CMN_CTRL, 0, 0);
static const struct reg_field pllctrl_lock =
REG_FIELD(SIERRA_PLLCTRL_STATUS_PREG, 0, 0);
+static const struct reg_field phy_iso_link_ctrl_1 =
+ REG_FIELD(SIERRA_PHY_ISO_LINK_CTRL, 1, 1);
+static const struct reg_field cmn_plllc_clk1outdiv_preg =
+ REG_FIELD(SIERRA_CMN_PLLLC_CLK1_PREG, 0, 6);
+static const struct reg_field cmn_plllc_clk1_en_preg =
+ REG_FIELD(SIERRA_CMN_PLLLC_CLK1_PREG, 12, 12);
static const char * const clk_names[] = {
[CDNS_SIERRA_PLL_CMNLC] = "pll_cmnlc",
[CDNS_SIERRA_PLL_CMNLC1] = "pll_cmnlc1",
+ [CDNS_SIERRA_DERIVED_REFCLK] = "refclk_der",
};
enum cdns_sierra_cmn_plllc {
@@ -215,14 +278,41 @@ static const int pll_mux_parent_index[][SIERRA_NUM_CMN_PLLC_PARENTS] = {
[CMN_PLLLC1] = { PLL1_REFCLK, PLL0_REFCLK },
};
-static u32 cdns_sierra_pll_mux_table[] = { 0, 1 };
+static u32 cdns_sierra_pll_mux_table[][SIERRA_NUM_CMN_PLLC_PARENTS] = {
+ [CMN_PLLLC] = { 0, 1 },
+ [CMN_PLLLC1] = { 1, 0 },
+};
+
+struct cdns_sierra_derived_refclk {
+ struct clk_hw hw;
+ struct regmap_field *cmn_plllc_clk1outdiv_preg;
+ struct regmap_field *cmn_plllc_clk1_en_preg;
+ struct clk_init_data clk_data;
+};
+
+#define to_cdns_sierra_derived_refclk(_hw) \
+ container_of(_hw, struct cdns_sierra_derived_refclk, hw)
+
+enum cdns_sierra_phy_type {
+ TYPE_NONE,
+ TYPE_PCIE,
+ TYPE_USB,
+ TYPE_QSGMII
+};
+
+enum cdns_sierra_ssc_mode {
+ NO_SSC,
+ EXTERNAL_SSC,
+ INTERNAL_SSC
+};
struct cdns_sierra_inst {
struct phy *phy;
- u32 phy_type;
+ enum cdns_sierra_phy_type phy_type;
u32 num_lanes;
u32 mlane;
struct reset_control *lnk_rst;
+ enum cdns_sierra_ssc_mode ssc_mode;
};
struct cdns_reg_pairs {
@@ -230,18 +320,23 @@ struct cdns_reg_pairs {
u32 off;
};
+struct cdns_sierra_vals {
+ const struct cdns_reg_pairs *reg_pairs;
+ u32 num_regs;
+};
+
struct cdns_sierra_data {
- u32 id_value;
- u8 block_offset_shift;
- u8 reg_offset_shift;
- u32 pcie_cmn_regs;
- u32 pcie_ln_regs;
- u32 usb_cmn_regs;
- u32 usb_ln_regs;
- const struct cdns_reg_pairs *pcie_cmn_vals;
- const struct cdns_reg_pairs *pcie_ln_vals;
- const struct cdns_reg_pairs *usb_cmn_vals;
- const struct cdns_reg_pairs *usb_ln_vals;
+ u32 id_value;
+ u8 block_offset_shift;
+ u8 reg_offset_shift;
+ struct cdns_sierra_vals *pcs_cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+ [NUM_SSC_MODE];
+ struct cdns_sierra_vals *phy_pma_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+ [NUM_SSC_MODE];
+ struct cdns_sierra_vals *pma_cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+ [NUM_SSC_MODE];
+ struct cdns_sierra_vals *pma_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+ [NUM_SSC_MODE];
};
struct cdns_regmap_cdb_context {
@@ -253,16 +348,21 @@ struct cdns_regmap_cdb_context {
struct cdns_sierra_phy {
struct device *dev;
struct regmap *regmap;
- struct cdns_sierra_data *init_data;
+ const struct cdns_sierra_data *init_data;
struct cdns_sierra_inst phys[SIERRA_MAX_LANES];
struct reset_control *phy_rst;
struct reset_control *apb_rst;
struct regmap *regmap_lane_cdb[SIERRA_MAX_LANES];
- struct regmap *regmap_phy_config_ctrl;
+ struct regmap *regmap_phy_pcs_common_cdb;
+ struct regmap *regmap_phy_pcs_lane_cdb[SIERRA_MAX_LANES];
+ struct regmap *regmap_phy_pma_common_cdb;
+ struct regmap *regmap_phy_pma_lane_cdb[SIERRA_MAX_LANES];
struct regmap *regmap_common_cdb;
struct regmap_field *macro_id_type;
struct regmap_field *phy_pll_cfg_1;
+ struct regmap_field *pma_cmn_ready;
struct regmap_field *pllctrl_lock[SIERRA_MAX_LANES];
+ struct regmap_field *phy_iso_link_ctrl_1[SIERRA_MAX_LANES];
struct regmap_field *cmn_refrcv_refclk_plllc1en_preg[SIERRA_NUM_CMN_PLLC];
struct regmap_field *cmn_refrcv_refclk_termen_preg[SIERRA_NUM_CMN_PLLC];
struct regmap_field *cmn_plllc_pfdclk1_sel_preg[SIERRA_NUM_CMN_PLLC];
@@ -329,51 +429,141 @@ static const struct regmap_config cdns_sierra_common_cdb_config = {
.reg_read = cdns_regmap_read,
};
-static const struct regmap_config cdns_sierra_phy_config_ctrl_config = {
- .name = "sierra_phy_config_ctrl",
+static const struct regmap_config cdns_sierra_phy_pcs_cmn_cdb_config = {
+ .name = "sierra_phy_pcs_cmn_cdb",
.reg_stride = 1,
.fast_io = true,
.reg_write = cdns_regmap_write,
.reg_read = cdns_regmap_read,
};
+#define SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF(n) \
+{ \
+ .name = "sierra_phy_pcs_lane" n "_cdb", \
+ .reg_stride = 1, \
+ .fast_io = true, \
+ .reg_write = cdns_regmap_write, \
+ .reg_read = cdns_regmap_read, \
+}
+
+static const struct regmap_config cdns_sierra_phy_pcs_lane_cdb_config[] = {
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("0"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("1"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("2"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("3"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("4"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("5"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("6"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("7"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("8"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("9"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("10"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("11"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("12"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("13"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("14"),
+ SIERRA_PHY_PCS_LANE_CDB_REGMAP_CONF("15"),
+};
+
+static const struct regmap_config cdns_sierra_phy_pma_cmn_cdb_config = {
+ .name = "sierra_phy_pma_cmn_cdb",
+ .reg_stride = 1,
+ .fast_io = true,
+ .reg_write = cdns_regmap_write,
+ .reg_read = cdns_regmap_read,
+};
+
+#define SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF(n) \
+{ \
+ .name = "sierra_phy_pma_lane" n "_cdb", \
+ .reg_stride = 1, \
+ .fast_io = true, \
+ .reg_write = cdns_regmap_write, \
+ .reg_read = cdns_regmap_read, \
+}
+
+static const struct regmap_config cdns_sierra_phy_pma_lane_cdb_config[] = {
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("0"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("1"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("2"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("3"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("4"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("5"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("6"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("7"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("8"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("9"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("10"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("11"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("12"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("13"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("14"),
+ SIERRA_PHY_PMA_LANE_CDB_REGMAP_CONF("15"),
+};
+
static int cdns_sierra_phy_init(struct phy *gphy)
{
struct cdns_sierra_inst *ins = phy_get_drvdata(gphy);
struct cdns_sierra_phy *phy = dev_get_drvdata(gphy->dev.parent);
+ const struct cdns_sierra_data *init_data = phy->init_data;
+ struct cdns_sierra_vals *pma_cmn_vals, *pma_ln_vals;
+ enum cdns_sierra_phy_type phy_type = ins->phy_type;
+ enum cdns_sierra_ssc_mode ssc = ins->ssc_mode;
+ struct cdns_sierra_vals *phy_pma_ln_vals;
+ const struct cdns_reg_pairs *reg_pairs;
+ struct cdns_sierra_vals *pcs_cmn_vals;
struct regmap *regmap;
+ u32 num_regs;
int i, j;
- const struct cdns_reg_pairs *cmn_vals, *ln_vals;
- u32 num_cmn_regs, num_ln_regs;
/* Initialise the PHY registers, unless auto configured */
- if (phy->autoconf)
+ if (phy->autoconf || phy->nsubnodes > 1)
return 0;
clk_set_rate(phy->input_clks[CMN_REFCLK_DIG_DIV], 25000000);
clk_set_rate(phy->input_clks[CMN_REFCLK1_DIG_DIV], 25000000);
- if (ins->phy_type == PHY_TYPE_PCIE) {
- num_cmn_regs = phy->init_data->pcie_cmn_regs;
- num_ln_regs = phy->init_data->pcie_ln_regs;
- cmn_vals = phy->init_data->pcie_cmn_vals;
- ln_vals = phy->init_data->pcie_ln_vals;
- } else if (ins->phy_type == PHY_TYPE_USB3) {
- num_cmn_regs = phy->init_data->usb_cmn_regs;
- num_ln_regs = phy->init_data->usb_ln_regs;
- cmn_vals = phy->init_data->usb_cmn_vals;
- ln_vals = phy->init_data->usb_ln_vals;
- } else {
- return -EINVAL;
+
+ /* PHY PCS common registers configurations */
+ pcs_cmn_vals = init_data->pcs_cmn_vals[phy_type][TYPE_NONE][ssc];
+ if (pcs_cmn_vals) {
+ reg_pairs = pcs_cmn_vals->reg_pairs;
+ num_regs = pcs_cmn_vals->num_regs;
+ regmap = phy->regmap_phy_pcs_common_cdb;
+ for (i = 0; i < num_regs; i++)
+ regmap_write(regmap, reg_pairs[i].off, reg_pairs[i].val);
+ }
+
+ /* PHY PMA lane registers configurations */
+ phy_pma_ln_vals = init_data->phy_pma_ln_vals[phy_type][TYPE_NONE][ssc];
+ if (phy_pma_ln_vals) {
+ reg_pairs = phy_pma_ln_vals->reg_pairs;
+ num_regs = phy_pma_ln_vals->num_regs;
+ for (i = 0; i < ins->num_lanes; i++) {
+ regmap = phy->regmap_phy_pma_lane_cdb[i + ins->mlane];
+ for (j = 0; j < num_regs; j++)
+ regmap_write(regmap, reg_pairs[j].off, reg_pairs[j].val);
+ }
}
- regmap = phy->regmap_common_cdb;
- for (j = 0; j < num_cmn_regs ; j++)
- regmap_write(regmap, cmn_vals[j].off, cmn_vals[j].val);
+ /* PMA common registers configurations */
+ pma_cmn_vals = init_data->pma_cmn_vals[phy_type][TYPE_NONE][ssc];
+ if (pma_cmn_vals) {
+ reg_pairs = pma_cmn_vals->reg_pairs;
+ num_regs = pma_cmn_vals->num_regs;
+ regmap = phy->regmap_common_cdb;
+ for (i = 0; i < num_regs; i++)
+ regmap_write(regmap, reg_pairs[i].off, reg_pairs[i].val);
+ }
- for (i = 0; i < ins->num_lanes; i++) {
- for (j = 0; j < num_ln_regs ; j++) {
+ /* PMA lane registers configurations */
+ pma_ln_vals = init_data->pma_ln_vals[phy_type][TYPE_NONE][ssc];
+ if (pma_ln_vals) {
+ reg_pairs = pma_ln_vals->reg_pairs;
+ num_regs = pma_ln_vals->num_regs;
+ for (i = 0; i < ins->num_lanes; i++) {
regmap = phy->regmap_lane_cdb[i + ins->mlane];
- regmap_write(regmap, ln_vals[j].off, ln_vals[j].val);
+ for (j = 0; j < num_regs; j++)
+ regmap_write(regmap, reg_pairs[j].off, reg_pairs[j].val);
}
}
@@ -388,10 +578,13 @@ static int cdns_sierra_phy_on(struct phy *gphy)
u32 val;
int ret;
- ret = reset_control_deassert(sp->phy_rst);
- if (ret) {
- dev_err(dev, "Failed to take the PHY out of reset\n");
- return ret;
+ if (sp->nsubnodes == 1) {
+ /* Take the PHY out of reset */
+ ret = reset_control_deassert(sp->phy_rst);
+ if (ret) {
+ dev_err(dev, "Failed to take the PHY out of reset\n");
+ return ret;
+ }
}
/* Take the PHY lane group out of reset */
@@ -401,6 +594,26 @@ static int cdns_sierra_phy_on(struct phy *gphy)
return ret;
}
+ if (ins->phy_type == TYPE_PCIE || ins->phy_type == TYPE_USB) {
+ ret = regmap_field_read_poll_timeout(sp->phy_iso_link_ctrl_1[ins->mlane],
+ val, !val, 1000, PLL_LOCK_TIME);
+ if (ret) {
+ dev_err(dev, "Timeout waiting for PHY status ready\n");
+ return ret;
+ }
+ }
+
+ /*
+ * Wait for cmn_ready assertion
+ * PHY_PMA_CMN_CTRL[0] == 1
+ */
+ ret = regmap_field_read_poll_timeout(sp->pma_cmn_ready, val, val,
+ 1000, PLL_LOCK_TIME);
+ if (ret) {
+ dev_err(dev, "Timeout waiting for CMN ready\n");
+ return ret;
+ }
+
ret = regmap_field_read_poll_timeout(sp->pllctrl_lock[ins->mlane],
val, val, 1000, PLL_LOCK_TIME);
if (ret < 0)
@@ -436,11 +649,25 @@ static const struct phy_ops ops = {
static u8 cdns_sierra_pll_mux_get_parent(struct clk_hw *hw)
{
struct cdns_sierra_pll_mux *mux = to_cdns_sierra_pll_mux(hw);
+ struct regmap_field *plllc1en_field = mux->plllc1en_field;
+ struct regmap_field *termen_field = mux->termen_field;
struct regmap_field *field = mux->pfdclk_sel_preg;
unsigned int val;
+ int index;
regmap_field_read(field, &val);
- return clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table, 0, val);
+
+ if (strstr(clk_hw_get_name(hw), clk_names[CDNS_SIERRA_PLL_CMNLC1])) {
+ index = clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table[CMN_PLLLC1], 0, val);
+ if (index == 1) {
+ regmap_field_write(plllc1en_field, 1);
+ regmap_field_write(termen_field, 1);
+ }
+ } else {
+ index = clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table[CMN_PLLLC], 0, val);
+ }
+
+ return index;
}
static int cdns_sierra_pll_mux_set_parent(struct clk_hw *hw, u8 index)
@@ -458,7 +685,11 @@ static int cdns_sierra_pll_mux_set_parent(struct clk_hw *hw, u8 index)
ret |= regmap_field_write(termen_field, 1);
}
- val = cdns_sierra_pll_mux_table[index];
+ if (strstr(clk_hw_get_name(hw), clk_names[CDNS_SIERRA_PLL_CMNLC1]))
+ val = cdns_sierra_pll_mux_table[CMN_PLLLC1][index];
+ else
+ val = cdns_sierra_pll_mux_table[CMN_PLLLC][index];
+
ret |= regmap_field_write(field, val);
return ret;
@@ -496,8 +727,8 @@ static int cdns_sierra_pll_mux_register(struct cdns_sierra_phy *sp,
for (i = 0; i < num_parents; i++) {
clk = sp->input_clks[pll_mux_parent_index[clk_index][i]];
if (IS_ERR_OR_NULL(clk)) {
- dev_err(dev, "No parent clock for derived_refclk\n");
- return PTR_ERR(clk);
+ dev_err(dev, "No parent clock for PLL mux clocks\n");
+ return IS_ERR(clk) ? PTR_ERR(clk) : -ENOENT;
}
parent_names[i] = __clk_get_name(clk);
}
@@ -551,6 +782,91 @@ static int cdns_sierra_phy_register_pll_mux(struct cdns_sierra_phy *sp)
return 0;
}
+static int cdns_sierra_derived_refclk_enable(struct clk_hw *hw)
+{
+ struct cdns_sierra_derived_refclk *derived_refclk = to_cdns_sierra_derived_refclk(hw);
+
+ regmap_field_write(derived_refclk->cmn_plllc_clk1_en_preg, 0x1);
+
+ /* Programming to get 100Mhz clock output in ref_der_clk_out 5GHz VCO/50 = 100MHz */
+ regmap_field_write(derived_refclk->cmn_plllc_clk1outdiv_preg, 0x2E);
+
+ return 0;
+}
+
+static void cdns_sierra_derived_refclk_disable(struct clk_hw *hw)
+{
+ struct cdns_sierra_derived_refclk *derived_refclk = to_cdns_sierra_derived_refclk(hw);
+
+ regmap_field_write(derived_refclk->cmn_plllc_clk1_en_preg, 0);
+}
+
+static int cdns_sierra_derived_refclk_is_enabled(struct clk_hw *hw)
+{
+ struct cdns_sierra_derived_refclk *derived_refclk = to_cdns_sierra_derived_refclk(hw);
+ int val;
+
+ regmap_field_read(derived_refclk->cmn_plllc_clk1_en_preg, &val);
+
+ return !!val;
+}
+
+static const struct clk_ops cdns_sierra_derived_refclk_ops = {
+ .enable = cdns_sierra_derived_refclk_enable,
+ .disable = cdns_sierra_derived_refclk_disable,
+ .is_enabled = cdns_sierra_derived_refclk_is_enabled,
+};
+
+static int cdns_sierra_derived_refclk_register(struct cdns_sierra_phy *sp)
+{
+ struct cdns_sierra_derived_refclk *derived_refclk;
+ struct device *dev = sp->dev;
+ struct regmap_field *field;
+ struct clk_init_data *init;
+ struct regmap *regmap;
+ char clk_name[100];
+ struct clk *clk;
+
+ derived_refclk = devm_kzalloc(dev, sizeof(*derived_refclk), GFP_KERNEL);
+ if (!derived_refclk)
+ return -ENOMEM;
+
+ snprintf(clk_name, sizeof(clk_name), "%s_%s", dev_name(dev),
+ clk_names[CDNS_SIERRA_DERIVED_REFCLK]);
+
+ init = &derived_refclk->clk_data;
+
+ init->ops = &cdns_sierra_derived_refclk_ops;
+ init->flags = 0;
+ init->name = clk_name;
+
+ regmap = sp->regmap_common_cdb;
+
+ field = devm_regmap_field_alloc(dev, regmap, cmn_plllc_clk1outdiv_preg);
+ if (IS_ERR(field)) {
+ dev_err(dev, "cmn_plllc_clk1outdiv_preg reg field init failed\n");
+ return PTR_ERR(field);
+ }
+ derived_refclk->cmn_plllc_clk1outdiv_preg = field;
+
+ field = devm_regmap_field_alloc(dev, regmap, cmn_plllc_clk1_en_preg);
+ if (IS_ERR(field)) {
+ dev_err(dev, "cmn_plllc_clk1_en_preg reg field init failed\n");
+ return PTR_ERR(field);
+ }
+ derived_refclk->cmn_plllc_clk1_en_preg = field;
+
+ derived_refclk->hw.init = init;
+
+ clk = devm_clk_register(dev, &derived_refclk->hw);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ sp->output_clks[CDNS_SIERRA_DERIVED_REFCLK] = clk;
+
+ return 0;
+}
+
static void cdns_sierra_clk_unregister(struct cdns_sierra_phy *sp)
{
struct device *dev = sp->dev;
@@ -571,6 +887,12 @@ static int cdns_sierra_clk_register(struct cdns_sierra_phy *sp)
return ret;
}
+ ret = cdns_sierra_derived_refclk_register(sp);
+ if (ret) {
+ dev_err(dev, "Failed to register derived refclk\n");
+ return ret;
+ }
+
sp->clk_data.clks = sp->output_clks;
sp->clk_data.clk_num = CDNS_SIERRA_OUTPUT_CLOCKS;
ret = of_clk_add_provider(node, of_clk_src_onecell_get, &sp->clk_data);
@@ -583,20 +905,37 @@ static int cdns_sierra_clk_register(struct cdns_sierra_phy *sp)
static int cdns_sierra_get_optional(struct cdns_sierra_inst *inst,
struct device_node *child)
{
+ u32 phy_type;
+
if (of_property_read_u32(child, "reg", &inst->mlane))
return -EINVAL;
if (of_property_read_u32(child, "cdns,num-lanes", &inst->num_lanes))
return -EINVAL;
- if (of_property_read_u32(child, "cdns,phy-type", &inst->phy_type))
+ if (of_property_read_u32(child, "cdns,phy-type", &phy_type))
return -EINVAL;
+ switch (phy_type) {
+ case PHY_TYPE_PCIE:
+ inst->phy_type = TYPE_PCIE;
+ break;
+ case PHY_TYPE_USB3:
+ inst->phy_type = TYPE_USB;
+ break;
+ case PHY_TYPE_QSGMII:
+ inst->phy_type = TYPE_QSGMII;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ inst->ssc_mode = EXTERNAL_SSC;
+ of_property_read_u32(child, "cdns,ssc-mode", &inst->ssc_mode);
+
return 0;
}
-static const struct of_device_id cdns_sierra_id_table[];
-
static struct regmap *cdns_regmap_init(struct device *dev, void __iomem *base,
u32 block_offset, u8 reg_offset_shift,
const struct regmap_config *config)
@@ -656,7 +995,7 @@ static int cdns_regfield_init(struct cdns_sierra_phy *sp)
sp->cmn_refrcv_refclk_termen_preg[i] = field;
}
- regmap = sp->regmap_phy_config_ctrl;
+ regmap = sp->regmap_phy_pcs_common_cdb;
field = devm_regmap_field_alloc(dev, regmap, phy_pll_cfg_1);
if (IS_ERR(field)) {
dev_err(dev, "PHY_PLL_CFG_1 reg field init failed\n");
@@ -664,6 +1003,14 @@ static int cdns_regfield_init(struct cdns_sierra_phy *sp)
}
sp->phy_pll_cfg_1 = field;
+ regmap = sp->regmap_phy_pma_common_cdb;
+ field = devm_regmap_field_alloc(dev, regmap, pma_cmn_ready);
+ if (IS_ERR(field)) {
+ dev_err(dev, "PHY_PMA_CMN_CTRL reg field init failed\n");
+ return PTR_ERR(field);
+ }
+ sp->pma_cmn_ready = field;
+
for (i = 0; i < SIERRA_MAX_LANES; i++) {
regmap = sp->regmap_lane_cdb[i];
field = devm_regmap_field_alloc(dev, regmap, pllctrl_lock);
@@ -671,7 +1018,17 @@ static int cdns_regfield_init(struct cdns_sierra_phy *sp)
dev_err(dev, "P%d_ENABLE reg field init failed\n", i);
return PTR_ERR(field);
}
- sp->pllctrl_lock[i] = field;
+ sp->pllctrl_lock[i] = field;
+ }
+
+ for (i = 0; i < SIERRA_MAX_LANES; i++) {
+ regmap = sp->regmap_phy_pcs_lane_cdb[i];
+ field = devm_regmap_field_alloc(dev, regmap, phy_iso_link_ctrl_1);
+ if (IS_ERR(field)) {
+ dev_err(dev, "PHY_ISO_LINK_CTRL reg field init for lane %d failed\n", i);
+ return PTR_ERR(field);
+ }
+ sp->phy_iso_link_ctrl_1[i] = field;
}
return 0;
@@ -708,14 +1065,49 @@ static int cdns_regmap_init_blocks(struct cdns_sierra_phy *sp,
}
sp->regmap_common_cdb = regmap;
- block_offset = SIERRA_PHY_CONFIG_CTRL_OFFSET(block_offset_shift);
+ block_offset = SIERRA_PHY_PCS_COMMON_OFFSET(block_offset_shift);
+ regmap = cdns_regmap_init(dev, base, block_offset, reg_offset_shift,
+ &cdns_sierra_phy_pcs_cmn_cdb_config);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "Failed to init PHY PCS common CDB regmap\n");
+ return PTR_ERR(regmap);
+ }
+ sp->regmap_phy_pcs_common_cdb = regmap;
+
+ for (i = 0; i < SIERRA_MAX_LANES; i++) {
+ block_offset = SIERRA_PHY_PCS_LANE_CDB_OFFSET(i, block_offset_shift,
+ reg_offset_shift);
+ regmap = cdns_regmap_init(dev, base, block_offset,
+ reg_offset_shift,
+ &cdns_sierra_phy_pcs_lane_cdb_config[i]);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "Failed to init PHY PCS lane CDB regmap\n");
+ return PTR_ERR(regmap);
+ }
+ sp->regmap_phy_pcs_lane_cdb[i] = regmap;
+ }
+
+ block_offset = SIERRA_PHY_PMA_COMMON_OFFSET(block_offset_shift);
regmap = cdns_regmap_init(dev, base, block_offset, reg_offset_shift,
- &cdns_sierra_phy_config_ctrl_config);
+ &cdns_sierra_phy_pma_cmn_cdb_config);
if (IS_ERR(regmap)) {
- dev_err(dev, "Failed to init PHY config and control regmap\n");
+ dev_err(dev, "Failed to init PHY PMA common CDB regmap\n");
return PTR_ERR(regmap);
}
- sp->regmap_phy_config_ctrl = regmap;
+ sp->regmap_phy_pma_common_cdb = regmap;
+
+ for (i = 0; i < SIERRA_MAX_LANES; i++) {
+ block_offset = SIERRA_PHY_PMA_LANE_CDB_OFFSET(i, block_offset_shift,
+ reg_offset_shift);
+ regmap = cdns_regmap_init(dev, base, block_offset,
+ reg_offset_shift,
+ &cdns_sierra_phy_pma_lane_cdb_config[i]);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "Failed to init PHY PMA lane CDB regmap\n");
+ return PTR_ERR(regmap);
+ }
+ sp->regmap_phy_pma_lane_cdb[i] = regmap;
+ }
return 0;
}
@@ -824,13 +1216,127 @@ static int cdns_sierra_phy_get_resets(struct cdns_sierra_phy *sp,
return 0;
}
+static int cdns_sierra_phy_configure_multilink(struct cdns_sierra_phy *sp)
+{
+ const struct cdns_sierra_data *init_data = sp->init_data;
+ struct cdns_sierra_vals *pma_cmn_vals, *pma_ln_vals;
+ enum cdns_sierra_phy_type phy_t1, phy_t2;
+ struct cdns_sierra_vals *phy_pma_ln_vals;
+ const struct cdns_reg_pairs *reg_pairs;
+ struct cdns_sierra_vals *pcs_cmn_vals;
+ int i, j, node, mlane, num_lanes, ret;
+ enum cdns_sierra_ssc_mode ssc;
+ struct regmap *regmap;
+ u32 num_regs;
+
+ /* Maximum 2 links (subnodes) are supported */
+ if (sp->nsubnodes != 2)
+ return -EINVAL;
+
+ clk_set_rate(sp->input_clks[CMN_REFCLK_DIG_DIV], 25000000);
+ clk_set_rate(sp->input_clks[CMN_REFCLK1_DIG_DIV], 25000000);
+
+ /* PHY configured to use both PLL LC and LC1 */
+ regmap_field_write(sp->phy_pll_cfg_1, 0x1);
+
+ phy_t1 = sp->phys[0].phy_type;
+ phy_t2 = sp->phys[1].phy_type;
+
+ /*
+ * PHY configuration for multi-link operation is done in two steps.
+ * e.g. Consider a case for a 4 lane PHY with PCIe using 2 lanes and QSGMII other 2 lanes.
+ * Sierra PHY has 2 PLLs, viz. PLLLC and PLLLC1. So in this case, PLLLC is used for PCIe
+ * and PLLLC1 is used for QSGMII. PHY is configured in two steps as described below.
+ *
+ * [1] For first step, phy_t1 = TYPE_PCIE and phy_t2 = TYPE_QSGMII
+ * So the register values are selected as [TYPE_PCIE][TYPE_QSGMII][ssc].
+ * This will configure PHY registers associated for PCIe (i.e. first protocol)
+ * involving PLLLC registers and registers for first 2 lanes of PHY.
+ * [2] In second step, the variables phy_t1 and phy_t2 are swapped. So now,
+ * phy_t1 = TYPE_QSGMII and phy_t2 = TYPE_PCIE. And the register values are selected as
+ * [TYPE_QSGMII][TYPE_PCIE][ssc].
+ * This will configure PHY registers associated for QSGMII (i.e. second protocol)
+ * involving PLLLC1 registers and registers for other 2 lanes of PHY.
+ *
+ * This completes the PHY configuration for multilink operation. This approach enables
+ * dividing the large number of PHY register configurations into protocol specific
+ * smaller groups.
+ */
+ for (node = 0; node < sp->nsubnodes; node++) {
+ if (node == 1) {
+ /*
+ * If first link with phy_t1 is configured, then configure the PHY for
+ * second link with phy_t2. Get the array values as [phy_t2][phy_t1][ssc].
+ */
+ swap(phy_t1, phy_t2);
+ }
+
+ mlane = sp->phys[node].mlane;
+ ssc = sp->phys[node].ssc_mode;
+ num_lanes = sp->phys[node].num_lanes;
+
+ /* PHY PCS common registers configurations */
+ pcs_cmn_vals = init_data->pcs_cmn_vals[phy_t1][phy_t2][ssc];
+ if (pcs_cmn_vals) {
+ reg_pairs = pcs_cmn_vals->reg_pairs;
+ num_regs = pcs_cmn_vals->num_regs;
+ regmap = sp->regmap_phy_pcs_common_cdb;
+ for (i = 0; i < num_regs; i++)
+ regmap_write(regmap, reg_pairs[i].off, reg_pairs[i].val);
+ }
+
+ /* PHY PMA lane registers configurations */
+ phy_pma_ln_vals = init_data->phy_pma_ln_vals[phy_t1][phy_t2][ssc];
+ if (phy_pma_ln_vals) {
+ reg_pairs = phy_pma_ln_vals->reg_pairs;
+ num_regs = phy_pma_ln_vals->num_regs;
+ for (i = 0; i < num_lanes; i++) {
+ regmap = sp->regmap_phy_pma_lane_cdb[i + mlane];
+ for (j = 0; j < num_regs; j++)
+ regmap_write(regmap, reg_pairs[j].off, reg_pairs[j].val);
+ }
+ }
+
+ /* PMA common registers configurations */
+ pma_cmn_vals = init_data->pma_cmn_vals[phy_t1][phy_t2][ssc];
+ if (pma_cmn_vals) {
+ reg_pairs = pma_cmn_vals->reg_pairs;
+ num_regs = pma_cmn_vals->num_regs;
+ regmap = sp->regmap_common_cdb;
+ for (i = 0; i < num_regs; i++)
+ regmap_write(regmap, reg_pairs[i].off, reg_pairs[i].val);
+ }
+
+ /* PMA lane registers configurations */
+ pma_ln_vals = init_data->pma_ln_vals[phy_t1][phy_t2][ssc];
+ if (pma_ln_vals) {
+ reg_pairs = pma_ln_vals->reg_pairs;
+ num_regs = pma_ln_vals->num_regs;
+ for (i = 0; i < num_lanes; i++) {
+ regmap = sp->regmap_lane_cdb[i + mlane];
+ for (j = 0; j < num_regs; j++)
+ regmap_write(regmap, reg_pairs[j].off, reg_pairs[j].val);
+ }
+ }
+
+ if (phy_t1 == TYPE_QSGMII)
+ reset_control_deassert(sp->phys[node].lnk_rst);
+ }
+
+ /* Take the PHY out of reset */
+ ret = reset_control_deassert(sp->phy_rst);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
static int cdns_sierra_phy_probe(struct platform_device *pdev)
{
struct cdns_sierra_phy *sp;
struct phy_provider *phy_provider;
struct device *dev = &pdev->dev;
- const struct of_device_id *match;
- struct cdns_sierra_data *data;
+ const struct cdns_sierra_data *data;
unsigned int id_value;
int i, ret, node = 0;
void __iomem *base;
@@ -840,12 +1346,10 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
return -ENODEV;
/* Get init data for this PHY */
- match = of_match_device(cdns_sierra_id_table, dev);
- if (!match)
+ data = of_device_get_match_data(dev);
+ if (!data)
return -EINVAL;
- data = (struct cdns_sierra_data *)match->data;
-
sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL);
if (!sp)
return -ENOMEM;
@@ -946,8 +1450,11 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
}
/* If more than one subnode, configure the PHY as multilink */
- if (!sp->autoconf && sp->nsubnodes > 1)
- regmap_field_write(sp->phy_pll_cfg_1, 0x1);
+ if (!sp->autoconf && sp->nsubnodes > 1) {
+ ret = cdns_sierra_phy_configure_multilink(sp);
+ if (ret)
+ goto put_child2;
+ }
pm_runtime_enable(dev);
phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
@@ -991,6 +1498,449 @@ static int cdns_sierra_phy_remove(struct platform_device *pdev)
return 0;
}
+/* QSGMII PHY PMA lane configuration */
+static struct cdns_reg_pairs qsgmii_phy_pma_ln_regs[] = {
+ {0x9010, SIERRA_PHY_PMA_XCVR_CTRL}
+};
+
+static struct cdns_sierra_vals qsgmii_phy_pma_ln_vals = {
+ .reg_pairs = qsgmii_phy_pma_ln_regs,
+ .num_regs = ARRAY_SIZE(qsgmii_phy_pma_ln_regs),
+};
+
+/* QSGMII refclk 100MHz, 20b, opt1, No BW cal, no ssc, PLL LC1 */
+static const struct cdns_reg_pairs qsgmii_100_no_ssc_plllc1_cmn_regs[] = {
+ {0x2085, SIERRA_CMN_PLLLC1_LF_COEFF_MODE0_PREG},
+ {0x0000, SIERRA_CMN_PLLLC1_BWCAL_MODE0_PREG},
+ {0x0000, SIERRA_CMN_PLLLC1_SS_TIME_STEPSIZE_MODE_PREG}
+};
+
+static const struct cdns_reg_pairs qsgmii_100_no_ssc_plllc1_ln_regs[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x0252, SIERRA_DET_STANDEC_E_PREG},
+ {0x0004, SIERRA_PSC_LN_IDLE_PREG},
+ {0x0FFE, SIERRA_PSC_RX_A0_PREG},
+ {0x0011, SIERRA_PLLCTRL_SUBRATE_PREG},
+ {0x0001, SIERRA_PLLCTRL_GEN_A_PREG},
+ {0x5233, SIERRA_PLLCTRL_CPGAIN_MODE_PREG},
+ {0x0000, SIERRA_DRVCTRL_ATTEN_PREG},
+ {0x0089, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x3C3C, SIERRA_CREQ_CCLKDET_MODE01_PREG},
+ {0x3222, SIERRA_CREQ_FSMCLK_SEL_PREG},
+ {0x0000, SIERRA_CREQ_EQ_CTRL_PREG},
+ {0x8422, SIERRA_CTLELUT_CTRL_PREG},
+ {0x4111, SIERRA_DFE_ECMP_RATESEL_PREG},
+ {0x4111, SIERRA_DFE_SMP_RATESEL_PREG},
+ {0x0002, SIERRA_DEQ_PHALIGN_CTRL},
+ {0x9595, SIERRA_DEQ_VGATUNE_CTRL_PREG},
+ {0x0186, SIERRA_DEQ_GLUT0},
+ {0x0186, SIERRA_DEQ_GLUT1},
+ {0x0186, SIERRA_DEQ_GLUT2},
+ {0x0186, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x0861, SIERRA_DEQ_ALUT0},
+ {0x07E0, SIERRA_DEQ_ALUT1},
+ {0x079E, SIERRA_DEQ_ALUT2},
+ {0x071D, SIERRA_DEQ_ALUT3},
+ {0x03F5, SIERRA_DEQ_DFETAP_CTRL_PREG},
+ {0x0C01, SIERRA_DEQ_TAU_CTRL1_FAST_MAINT_PREG},
+ {0x3C40, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C04, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0033, SIERRA_DEQ_PICTRL_PREG},
+ {0x0660, SIERRA_CPICAL_TMRVAL_MODE0_PREG},
+ {0x00D5, SIERRA_CPI_OUTBUF_RATESEL_PREG},
+ {0x0B6D, SIERRA_CPI_RESBIAS_BIN_PREG},
+ {0x0102, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x0002, SIERRA_RXBUFFER_RCDFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals qsgmii_100_no_ssc_plllc1_cmn_vals = {
+ .reg_pairs = qsgmii_100_no_ssc_plllc1_cmn_regs,
+ .num_regs = ARRAY_SIZE(qsgmii_100_no_ssc_plllc1_cmn_regs),
+};
+
+static struct cdns_sierra_vals qsgmii_100_no_ssc_plllc1_ln_vals = {
+ .reg_pairs = qsgmii_100_no_ssc_plllc1_ln_regs,
+ .num_regs = ARRAY_SIZE(qsgmii_100_no_ssc_plllc1_ln_regs),
+};
+
+/* PCIE PHY PCS common configuration */
+static struct cdns_reg_pairs pcie_phy_pcs_cmn_regs[] = {
+ {0x0430, SIERRA_PHY_PIPE_CMN_CTRL1}
+};
+
+static struct cdns_sierra_vals pcie_phy_pcs_cmn_vals = {
+ .reg_pairs = pcie_phy_pcs_cmn_regs,
+ .num_regs = ARRAY_SIZE(pcie_phy_pcs_cmn_regs),
+};
+
+/* refclk100MHz_32b_PCIe_cmn_pll_no_ssc, pcie_links_using_plllc, pipe_bw_3 */
+static const struct cdns_reg_pairs pcie_100_no_ssc_plllc_cmn_regs[] = {
+ {0x2105, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
+ {0x2105, SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG}
+};
+
+/*
+ * refclk100MHz_32b_PCIe_ln_no_ssc, multilink, using_plllc,
+ * cmn_pllcy_anaclk0_1Ghz, xcvr_pllclk_fullrt_500mhz
+ */
+static const struct cdns_reg_pairs ml_pcie_100_no_ssc_ln_regs[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x0004, SIERRA_PSC_LN_A3_PREG},
+ {0x0004, SIERRA_PSC_LN_A4_PREG},
+ {0x0004, SIERRA_PSC_LN_IDLE_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
+ {0x8055, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
+ {0x80BB, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
+ {0x8351, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
+ {0x8349, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_no_ssc_plllc_cmn_vals = {
+ .reg_pairs = pcie_100_no_ssc_plllc_cmn_regs,
+ .num_regs = ARRAY_SIZE(pcie_100_no_ssc_plllc_cmn_regs),
+};
+
+static struct cdns_sierra_vals ml_pcie_100_no_ssc_ln_vals = {
+ .reg_pairs = ml_pcie_100_no_ssc_ln_regs,
+ .num_regs = ARRAY_SIZE(ml_pcie_100_no_ssc_ln_regs),
+};
+
+/* refclk100MHz_32b_PCIe_cmn_pll_int_ssc, pcie_links_using_plllc, pipe_bw_3 */
+static const struct cdns_reg_pairs pcie_100_int_ssc_plllc_cmn_regs[] = {
+ {0x000E, SIERRA_CMN_PLLLC_MODE_PREG},
+ {0x4006, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
+ {0x4006, SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG},
+ {0x0000, SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG},
+ {0x0000, SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG},
+ {0x0581, SIERRA_CMN_PLLLC_DSMCORR_PREG},
+ {0x7F80, SIERRA_CMN_PLLLC_SS_PREG},
+ {0x0041, SIERRA_CMN_PLLLC_SS_AMP_STEP_SIZE_PREG},
+ {0x0464, SIERRA_CMN_PLLLC_SSTWOPT_PREG},
+ {0x0D0D, SIERRA_CMN_PLLLC_SS_TIME_STEPSIZE_MODE_PREG},
+ {0x0060, SIERRA_CMN_PLLLC_LOCK_DELAY_CTRL_PREG}
+};
+
+/*
+ * refclk100MHz_32b_PCIe_ln_int_ssc, multilink, using_plllc,
+ * cmn_pllcy_anaclk0_1Ghz, xcvr_pllclk_fullrt_500mhz
+ */
+static const struct cdns_reg_pairs ml_pcie_100_int_ssc_ln_regs[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x0004, SIERRA_PSC_LN_A3_PREG},
+ {0x0004, SIERRA_PSC_LN_A4_PREG},
+ {0x0004, SIERRA_PSC_LN_IDLE_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
+ {0x813E, SIERRA_CLKPATHCTRL_TMR_PREG},
+ {0x8047, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
+ {0x033C, SIERRA_RX_CTLE_MAINTENANCE_PREG},
+ {0x44CC, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_int_ssc_plllc_cmn_vals = {
+ .reg_pairs = pcie_100_int_ssc_plllc_cmn_regs,
+ .num_regs = ARRAY_SIZE(pcie_100_int_ssc_plllc_cmn_regs),
+};
+
+static struct cdns_sierra_vals ml_pcie_100_int_ssc_ln_vals = {
+ .reg_pairs = ml_pcie_100_int_ssc_ln_regs,
+ .num_regs = ARRAY_SIZE(ml_pcie_100_int_ssc_ln_regs),
+};
+
+/* refclk100MHz_32b_PCIe_cmn_pll_ext_ssc, pcie_links_using_plllc, pipe_bw_3 */
+static const struct cdns_reg_pairs pcie_100_ext_ssc_plllc_cmn_regs[] = {
+ {0x2106, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
+ {0x2106, SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG},
+ {0x1B1B, SIERRA_CMN_PLLLC_SS_TIME_STEPSIZE_MODE_PREG}
+};
+
+/*
+ * refclk100MHz_32b_PCIe_ln_ext_ssc, multilink, using_plllc,
+ * cmn_pllcy_anaclk0_1Ghz, xcvr_pllclk_fullrt_500mhz
+ */
+static const struct cdns_reg_pairs ml_pcie_100_ext_ssc_ln_regs[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x0004, SIERRA_PSC_LN_A3_PREG},
+ {0x0004, SIERRA_PSC_LN_A4_PREG},
+ {0x0004, SIERRA_PSC_LN_IDLE_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
+ {0x813E, SIERRA_CLKPATHCTRL_TMR_PREG},
+ {0x8047, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
+ {0x033C, SIERRA_RX_CTLE_MAINTENANCE_PREG},
+ {0x44CC, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_ext_ssc_plllc_cmn_vals = {
+ .reg_pairs = pcie_100_ext_ssc_plllc_cmn_regs,
+ .num_regs = ARRAY_SIZE(pcie_100_ext_ssc_plllc_cmn_regs),
+};
+
+static struct cdns_sierra_vals ml_pcie_100_ext_ssc_ln_vals = {
+ .reg_pairs = ml_pcie_100_ext_ssc_ln_regs,
+ .num_regs = ARRAY_SIZE(ml_pcie_100_ext_ssc_ln_regs),
+};
+
+/* refclk100MHz_32b_PCIe_cmn_pll_no_ssc */
+static const struct cdns_reg_pairs cdns_pcie_cmn_regs_no_ssc[] = {
+ {0x2105, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
+ {0x2105, SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG},
+ {0x8A06, SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG}
+};
+
+/* refclk100MHz_32b_PCIe_ln_no_ssc */
+static const struct cdns_reg_pairs cdns_pcie_ln_regs_no_ssc[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
+ {0x8055, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
+ {0x80BB, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
+ {0x8351, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
+ {0x8349, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_no_ssc_cmn_vals = {
+ .reg_pairs = cdns_pcie_cmn_regs_no_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_cmn_regs_no_ssc),
+};
+
+static struct cdns_sierra_vals pcie_100_no_ssc_ln_vals = {
+ .reg_pairs = cdns_pcie_ln_regs_no_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_ln_regs_no_ssc),
+};
+
+/* refclk100MHz_32b_PCIe_cmn_pll_int_ssc */
+static const struct cdns_reg_pairs cdns_pcie_cmn_regs_int_ssc[] = {
+ {0x000E, SIERRA_CMN_PLLLC_MODE_PREG},
+ {0x4006, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
+ {0x4006, SIERRA_CMN_PLLLC_LF_COEFF_MODE0_PREG},
+ {0x0000, SIERRA_CMN_PLLLC_BWCAL_MODE1_PREG},
+ {0x0000, SIERRA_CMN_PLLLC_BWCAL_MODE0_PREG},
+ {0x0581, SIERRA_CMN_PLLLC_DSMCORR_PREG},
+ {0x7F80, SIERRA_CMN_PLLLC_SS_PREG},
+ {0x0041, SIERRA_CMN_PLLLC_SS_AMP_STEP_SIZE_PREG},
+ {0x0464, SIERRA_CMN_PLLLC_SSTWOPT_PREG},
+ {0x0D0D, SIERRA_CMN_PLLLC_SS_TIME_STEPSIZE_MODE_PREG},
+ {0x0060, SIERRA_CMN_PLLLC_LOCK_DELAY_CTRL_PREG}
+};
+
+/* refclk100MHz_32b_PCIe_ln_int_ssc */
+static const struct cdns_reg_pairs cdns_pcie_ln_regs_int_ssc[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
+ {0x813E, SIERRA_CLKPATHCTRL_TMR_PREG},
+ {0x8047, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
+ {0x808F, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
+ {0x033C, SIERRA_RX_CTLE_MAINTENANCE_PREG},
+ {0x44CC, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_int_ssc_cmn_vals = {
+ .reg_pairs = cdns_pcie_cmn_regs_int_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_cmn_regs_int_ssc),
+};
+
+static struct cdns_sierra_vals pcie_100_int_ssc_ln_vals = {
+ .reg_pairs = cdns_pcie_ln_regs_int_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_ln_regs_int_ssc),
+};
+
/* refclk100MHz_32b_PCIe_cmn_pll_ext_ssc */
static const struct cdns_reg_pairs cdns_pcie_cmn_regs_ext_ssc[] = {
{0x2106, SIERRA_CMN_PLLLC_LF_COEFF_MODE1_PREG},
@@ -1002,13 +1952,62 @@ static const struct cdns_reg_pairs cdns_pcie_cmn_regs_ext_ssc[] = {
/* refclk100MHz_32b_PCIe_ln_ext_ssc */
static const struct cdns_reg_pairs cdns_pcie_ln_regs_ext_ssc[] = {
+ {0xFC08, SIERRA_DET_STANDEC_A_PREG},
+ {0x001D, SIERRA_PSM_A3IN_TMR_PREG},
+ {0x1555, SIERRA_DFE_BIASTRIM_PREG},
+ {0x9703, SIERRA_DRVCTRL_BOOST_PREG},
{0x813E, SIERRA_CLKPATHCTRL_TMR_PREG},
{0x8047, SIERRA_RX_CREQ_FLTR_A_MODE3_PREG},
{0x808F, SIERRA_RX_CREQ_FLTR_A_MODE2_PREG},
{0x808F, SIERRA_RX_CREQ_FLTR_A_MODE1_PREG},
{0x808F, SIERRA_RX_CREQ_FLTR_A_MODE0_PREG},
+ {0x0002, SIERRA_CREQ_DCBIASATTEN_OVR_PREG},
+ {0x9800, SIERRA_RX_CTLE_CAL_PREG},
{0x033C, SIERRA_RX_CTLE_MAINTENANCE_PREG},
- {0x44CC, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG}
+ {0x44CC, SIERRA_CREQ_EQ_OPEN_EYE_THRESH_PREG},
+ {0x5624, SIERRA_DEQ_CONCUR_CTRL2_PREG},
+ {0x000F, SIERRA_DEQ_EPIPWR_CTRL2_PREG},
+ {0x00FF, SIERRA_DEQ_FAST_MAINT_CYCLES_PREG},
+ {0x4C4C, SIERRA_DEQ_ERRCMP_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_OFFSET_CTRL_PREG},
+ {0x02FA, SIERRA_DEQ_GAIN_CTRL_PREG},
+ {0x0041, SIERRA_DEQ_GLUT0},
+ {0x0082, SIERRA_DEQ_GLUT1},
+ {0x00C3, SIERRA_DEQ_GLUT2},
+ {0x0145, SIERRA_DEQ_GLUT3},
+ {0x0186, SIERRA_DEQ_GLUT4},
+ {0x09E7, SIERRA_DEQ_ALUT0},
+ {0x09A6, SIERRA_DEQ_ALUT1},
+ {0x0965, SIERRA_DEQ_ALUT2},
+ {0x08E3, SIERRA_DEQ_ALUT3},
+ {0x00FA, SIERRA_DEQ_DFETAP0},
+ {0x00FA, SIERRA_DEQ_DFETAP1},
+ {0x00FA, SIERRA_DEQ_DFETAP2},
+ {0x00FA, SIERRA_DEQ_DFETAP3},
+ {0x00FA, SIERRA_DEQ_DFETAP4},
+ {0x000F, SIERRA_DEQ_PRECUR_PREG},
+ {0x0280, SIERRA_DEQ_POSTCUR_PREG},
+ {0x8F00, SIERRA_DEQ_POSTCUR_DECR_PREG},
+ {0x3C0F, SIERRA_DEQ_TAU_CTRL1_SLOW_MAINT_PREG},
+ {0x1C0C, SIERRA_DEQ_TAU_CTRL2_PREG},
+ {0x0100, SIERRA_DEQ_TAU_CTRL3_PREG},
+ {0x5E82, SIERRA_DEQ_OPENEYE_CTRL_PREG},
+ {0x002B, SIERRA_CPI_TRIM_PREG},
+ {0x0003, SIERRA_EPI_CTRL_PREG},
+ {0x803F, SIERRA_SDFILT_H2L_A_PREG},
+ {0x0004, SIERRA_RXBUFFER_CTLECTRL_PREG},
+ {0x2010, SIERRA_RXBUFFER_RCDFECTRL_PREG},
+ {0x4432, SIERRA_RXBUFFER_DFECTRL_PREG}
+};
+
+static struct cdns_sierra_vals pcie_100_ext_ssc_cmn_vals = {
+ .reg_pairs = cdns_pcie_cmn_regs_ext_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_cmn_regs_ext_ssc),
+};
+
+static struct cdns_sierra_vals pcie_100_ext_ssc_ln_vals = {
+ .reg_pairs = cdns_pcie_ln_regs_ext_ssc,
+ .num_regs = ARRAY_SIZE(cdns_pcie_ln_regs_ext_ssc),
};
/* refclk100MHz_20b_USB_cmn_pll_ext_ssc */
@@ -1118,32 +2117,167 @@ static const struct cdns_reg_pairs cdns_usb_ln_regs_ext_ssc[] = {
{0x4243, SIERRA_RXBUFFER_DFECTRL_PREG}
};
+static struct cdns_sierra_vals usb_100_ext_ssc_cmn_vals = {
+ .reg_pairs = cdns_usb_cmn_regs_ext_ssc,
+ .num_regs = ARRAY_SIZE(cdns_usb_cmn_regs_ext_ssc),
+};
+
+static struct cdns_sierra_vals usb_100_ext_ssc_ln_vals = {
+ .reg_pairs = cdns_usb_ln_regs_ext_ssc,
+ .num_regs = ARRAY_SIZE(cdns_usb_ln_regs_ext_ssc),
+};
+
static const struct cdns_sierra_data cdns_map_sierra = {
- SIERRA_MACRO_ID,
- 0x2,
- 0x2,
- ARRAY_SIZE(cdns_pcie_cmn_regs_ext_ssc),
- ARRAY_SIZE(cdns_pcie_ln_regs_ext_ssc),
- ARRAY_SIZE(cdns_usb_cmn_regs_ext_ssc),
- ARRAY_SIZE(cdns_usb_ln_regs_ext_ssc),
- cdns_pcie_cmn_regs_ext_ssc,
- cdns_pcie_ln_regs_ext_ssc,
- cdns_usb_cmn_regs_ext_ssc,
- cdns_usb_ln_regs_ext_ssc,
+ .id_value = SIERRA_MACRO_ID,
+ .block_offset_shift = 0x2,
+ .reg_offset_shift = 0x2,
+ .pcs_cmn_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_phy_pcs_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_phy_pcs_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ },
+ },
+ },
+ .pma_cmn_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_plllc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [EXTERNAL_SSC] = &usb_100_ext_ssc_cmn_vals,
+ },
+ },
+ [TYPE_QSGMII] = {
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ },
+ },
+ },
+ .pma_ln_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &ml_pcie_100_no_ssc_ln_vals,
+ [EXTERNAL_SSC] = &ml_pcie_100_ext_ssc_ln_vals,
+ [INTERNAL_SSC] = &ml_pcie_100_int_ssc_ln_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [EXTERNAL_SSC] = &usb_100_ext_ssc_ln_vals,
+ },
+ },
+ [TYPE_QSGMII] = {
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ },
+ },
+ },
};
static const struct cdns_sierra_data cdns_ti_map_sierra = {
- SIERRA_MACRO_ID,
- 0x0,
- 0x1,
- ARRAY_SIZE(cdns_pcie_cmn_regs_ext_ssc),
- ARRAY_SIZE(cdns_pcie_ln_regs_ext_ssc),
- ARRAY_SIZE(cdns_usb_cmn_regs_ext_ssc),
- ARRAY_SIZE(cdns_usb_ln_regs_ext_ssc),
- cdns_pcie_cmn_regs_ext_ssc,
- cdns_pcie_ln_regs_ext_ssc,
- cdns_usb_cmn_regs_ext_ssc,
- cdns_usb_ln_regs_ext_ssc,
+ .id_value = SIERRA_MACRO_ID,
+ .block_offset_shift = 0x0,
+ .reg_offset_shift = 0x1,
+ .pcs_cmn_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_phy_pcs_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_phy_pcs_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ [INTERNAL_SSC] = &pcie_phy_pcs_cmn_vals,
+ },
+ },
+ },
+ .phy_pma_ln_vals = {
+ [TYPE_QSGMII] = {
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_phy_pma_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_phy_pma_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_phy_pma_ln_vals,
+ },
+ },
+ },
+ .pma_cmn_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_cmn_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &pcie_100_no_ssc_plllc_cmn_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_plllc_cmn_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_plllc_cmn_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [EXTERNAL_SSC] = &usb_100_ext_ssc_cmn_vals,
+ },
+ },
+ [TYPE_QSGMII] = {
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_cmn_vals,
+ },
+ },
+ },
+ .pma_ln_vals = {
+ [TYPE_PCIE] = {
+ [TYPE_NONE] = {
+ [NO_SSC] = &pcie_100_no_ssc_ln_vals,
+ [EXTERNAL_SSC] = &pcie_100_ext_ssc_ln_vals,
+ [INTERNAL_SSC] = &pcie_100_int_ssc_ln_vals,
+ },
+ [TYPE_QSGMII] = {
+ [NO_SSC] = &ml_pcie_100_no_ssc_ln_vals,
+ [EXTERNAL_SSC] = &ml_pcie_100_ext_ssc_ln_vals,
+ [INTERNAL_SSC] = &ml_pcie_100_int_ssc_ln_vals,
+ },
+ },
+ [TYPE_USB] = {
+ [TYPE_NONE] = {
+ [EXTERNAL_SSC] = &usb_100_ext_ssc_ln_vals,
+ },
+ },
+ [TYPE_QSGMII] = {
+ [TYPE_PCIE] = {
+ [NO_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ [EXTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ [INTERNAL_SSC] = &qsgmii_100_no_ssc_plllc1_ln_vals,
+ },
+ },
+ },
};
static const struct of_device_id cdns_sierra_id_table[] = {
diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c
index 5786166133d3..7c4b8050485f 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -2278,7 +2278,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
struct cdns_torrent_vals *cmn_vals, *tx_ln_vals, *rx_ln_vals;
enum cdns_torrent_ref_clk ref_clk = cdns_phy->ref_clk_rate;
struct cdns_torrent_vals *link_cmn_vals, *xcvr_diag_vals;
- enum cdns_torrent_phy_type phy_t1, phy_t2, tmp_phy_type;
+ enum cdns_torrent_phy_type phy_t1, phy_t2;
struct cdns_torrent_vals *pcs_cmn_vals;
int i, j, node, mlane, num_lanes, ret;
struct cdns_reg_pairs *reg_pairs;
@@ -2304,9 +2304,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
* configure the PHY for second link with phy_t2.
* Get the array values as [phy_t2][phy_t1][ssc].
*/
- tmp_phy_type = phy_t1;
- phy_t1 = phy_t2;
- phy_t2 = tmp_phy_type;
+ swap(phy_t1, phy_t2);
}
mlane = cdns_phy->phys[node].mlane;
diff --git a/drivers/phy/freescale/Kconfig b/drivers/phy/freescale/Kconfig
index 320630ffe3cd..c3669c28ea9f 100644
--- a/drivers/phy/freescale/Kconfig
+++ b/drivers/phy/freescale/Kconfig
@@ -14,3 +14,11 @@ config PHY_MIXEL_MIPI_DPHY
help
Enable this to add support for the Mixel DSI PHY as found
on NXP's i.MX8 family of SOCs.
+
+config PHY_FSL_IMX8M_PCIE
+ tristate "Freescale i.MX8M PCIE PHY"
+ depends on OF && HAS_IOMEM
+ select GENERIC_PHY
+ help
+ Enable this to add support for the PCIE PHY as found on
+ i.MX8M family of SOCs.
diff --git a/drivers/phy/freescale/Makefile b/drivers/phy/freescale/Makefile
index 1d02e3869b45..55d07c742ab0 100644
--- a/drivers/phy/freescale/Makefile
+++ b/drivers/phy/freescale/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_PHY_FSL_IMX8MQ_USB) += phy-fsl-imx8mq-usb.o
obj-$(CONFIG_PHY_MIXEL_MIPI_DPHY) += phy-fsl-imx8-mipi-dphy.o
+obj-$(CONFIG_PHY_FSL_IMX8M_PCIE) += phy-fsl-imx8m-pcie.o
diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
new file mode 100644
index 000000000000..04b1aafb29f4
--- /dev/null
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2021 NXP
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/delay.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx7-iomuxc-gpr.h>
+#include <linux/module.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <dt-bindings/phy/phy-imx8-pcie.h>
+
+#define IMX8MM_PCIE_PHY_CMN_REG061 0x184
+#define ANA_PLL_CLK_OUT_TO_EXT_IO_EN BIT(0)
+#define IMX8MM_PCIE_PHY_CMN_REG062 0x188
+#define ANA_PLL_CLK_OUT_TO_EXT_IO_SEL BIT(3)
+#define IMX8MM_PCIE_PHY_CMN_REG063 0x18C
+#define AUX_PLL_REFCLK_SEL_SYS_PLL GENMASK(7, 6)
+#define IMX8MM_PCIE_PHY_CMN_REG064 0x190
+#define ANA_AUX_RX_TX_SEL_TX BIT(7)
+#define ANA_AUX_RX_TERM_GND_EN BIT(3)
+#define ANA_AUX_TX_TERM BIT(2)
+#define IMX8MM_PCIE_PHY_CMN_REG065 0x194
+#define ANA_AUX_RX_TERM (BIT(7) | BIT(4))
+#define ANA_AUX_TX_LVL GENMASK(3, 0)
+#define IMX8MM_PCIE_PHY_CMN_REG75 0x1D4
+#define PCIE_PHY_CMN_REG75_PLL_DONE 0x3
+#define PCIE_PHY_TRSV_REG5 0x414
+#define PCIE_PHY_TRSV_REG5_GEN1_DEEMP 0x2D
+#define PCIE_PHY_TRSV_REG6 0x418
+#define PCIE_PHY_TRSV_REG6_GEN2_DEEMP 0xF
+
+#define IMX8MM_GPR_PCIE_REF_CLK_SEL GENMASK(25, 24)
+#define IMX8MM_GPR_PCIE_REF_CLK_PLL FIELD_PREP(IMX8MM_GPR_PCIE_REF_CLK_SEL, 0x3)
+#define IMX8MM_GPR_PCIE_REF_CLK_EXT FIELD_PREP(IMX8MM_GPR_PCIE_REF_CLK_SEL, 0x2)
+#define IMX8MM_GPR_PCIE_AUX_EN BIT(19)
+#define IMX8MM_GPR_PCIE_CMN_RST BIT(18)
+#define IMX8MM_GPR_PCIE_POWER_OFF BIT(17)
+#define IMX8MM_GPR_PCIE_SSC_EN BIT(16)
+#define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE BIT(9)
+
+struct imx8_pcie_phy {
+ void __iomem *base;
+ struct clk *clk;
+ struct phy *phy;
+ struct regmap *iomuxc_gpr;
+ struct reset_control *reset;
+ u32 refclk_pad_mode;
+ u32 tx_deemph_gen1;
+ u32 tx_deemph_gen2;
+ bool clkreq_unused;
+};
+
+static int imx8_pcie_phy_init(struct phy *phy)
+{
+ int ret;
+ u32 val, pad_mode;
+ struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy);
+
+ reset_control_assert(imx8_phy->reset);
+
+ pad_mode = imx8_phy->refclk_pad_mode;
+ /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE,
+ imx8_phy->clkreq_unused ?
+ 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE);
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_AUX_EN,
+ IMX8MM_GPR_PCIE_AUX_EN);
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_POWER_OFF, 0);
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_SSC_EN, 0);
+
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_REF_CLK_SEL,
+ pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ?
+ IMX8MM_GPR_PCIE_REF_CLK_EXT :
+ IMX8MM_GPR_PCIE_REF_CLK_PLL);
+ usleep_range(100, 200);
+
+ /* Do the PHY common block reset */
+ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+ IMX8MM_GPR_PCIE_CMN_RST,
+ IMX8MM_GPR_PCIE_CMN_RST);
+ usleep_range(200, 500);
+
+ if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT) {
+ /* Configure the pad as input */
+ val = readl(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG061);
+ writel(val & ~ANA_PLL_CLK_OUT_TO_EXT_IO_EN,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG061);
+ } else if (pad_mode == IMX8_PCIE_REFCLK_PAD_OUTPUT) {
+ /* Configure the PHY to output the refclock via pad */
+ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_EN,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG061);
+ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062);
+ writel(AUX_PLL_REFCLK_SEL_SYS_PLL,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063);
+ val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM;
+ writel(val | ANA_AUX_RX_TERM_GND_EN,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064);
+ writel(ANA_AUX_RX_TERM | ANA_AUX_TX_LVL,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065);
+ }
+
+ /* Tune PHY de-emphasis setting to pass PCIe compliance. */
+ if (imx8_phy->tx_deemph_gen1)
+ writel(imx8_phy->tx_deemph_gen1,
+ imx8_phy->base + PCIE_PHY_TRSV_REG5);
+ if (imx8_phy->tx_deemph_gen2)
+ writel(imx8_phy->tx_deemph_gen2,
+ imx8_phy->base + PCIE_PHY_TRSV_REG6);
+
+ reset_control_deassert(imx8_phy->reset);
+
+ /* Polling to check the phy is ready or not. */
+ ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75,
+ val, val == PCIE_PHY_CMN_REG75_PLL_DONE,
+ 10, 20000);
+ return ret;
+}
+
+static int imx8_pcie_phy_power_on(struct phy *phy)
+{
+ struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy);
+
+ return clk_prepare_enable(imx8_phy->clk);
+}
+
+static int imx8_pcie_phy_power_off(struct phy *phy)
+{
+ struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy);
+
+ clk_disable_unprepare(imx8_phy->clk);
+
+ return 0;
+}
+
+static const struct phy_ops imx8_pcie_phy_ops = {
+ .init = imx8_pcie_phy_init,
+ .power_on = imx8_pcie_phy_power_on,
+ .power_off = imx8_pcie_phy_power_off,
+ .owner = THIS_MODULE,
+};
+
+static int imx8_pcie_phy_probe(struct platform_device *pdev)
+{
+ struct phy_provider *phy_provider;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct imx8_pcie_phy *imx8_phy;
+ struct resource *res;
+
+ imx8_phy = devm_kzalloc(dev, sizeof(*imx8_phy), GFP_KERNEL);
+ if (!imx8_phy)
+ return -ENOMEM;
+
+ /* get PHY refclk pad mode */
+ of_property_read_u32(np, "fsl,refclk-pad-mode",
+ &imx8_phy->refclk_pad_mode);
+
+ if (of_property_read_u32(np, "fsl,tx-deemph-gen1",
+ &imx8_phy->tx_deemph_gen1))
+ imx8_phy->tx_deemph_gen1 = 0;
+
+ if (of_property_read_u32(np, "fsl,tx-deemph-gen2",
+ &imx8_phy->tx_deemph_gen2))
+ imx8_phy->tx_deemph_gen2 = 0;
+
+ if (of_property_read_bool(np, "fsl,clkreq-unsupported"))
+ imx8_phy->clkreq_unused = true;
+ else
+ imx8_phy->clkreq_unused = false;
+
+ imx8_phy->clk = devm_clk_get(dev, "ref");
+ if (IS_ERR(imx8_phy->clk)) {
+ dev_err(dev, "failed to get imx pcie phy clock\n");
+ return PTR_ERR(imx8_phy->clk);
+ }
+
+ /* Grab GPR config register range */
+ imx8_phy->iomuxc_gpr =
+ syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+ if (IS_ERR(imx8_phy->iomuxc_gpr)) {
+ dev_err(dev, "unable to find iomuxc registers\n");
+ return PTR_ERR(imx8_phy->iomuxc_gpr);
+ }
+
+ imx8_phy->reset = devm_reset_control_get_exclusive(dev, "pciephy");
+ if (IS_ERR(imx8_phy->reset)) {
+ dev_err(dev, "Failed to get PCIEPHY reset control\n");
+ return PTR_ERR(imx8_phy->reset);
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ imx8_phy->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(imx8_phy->base))
+ return PTR_ERR(imx8_phy->base);
+
+ imx8_phy->phy = devm_phy_create(dev, NULL, &imx8_pcie_phy_ops);
+ if (IS_ERR(imx8_phy->phy))
+ return PTR_ERR(imx8_phy->phy);
+
+ phy_set_drvdata(imx8_phy->phy, imx8_phy);
+
+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+ return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id imx8_pcie_phy_of_match[] = {
+ {.compatible = "fsl,imx8mm-pcie-phy",},
+ { },
+};
+MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match);
+
+static struct platform_driver imx8_pcie_phy_driver = {
+ .probe = imx8_pcie_phy_probe,
+ .driver = {
+ .name = "imx8-pcie-phy",
+ .of_match_table = imx8_pcie_phy_of_match,
+ }
+};
+module_platform_driver(imx8_pcie_phy_driver);
+
+MODULE_DESCRIPTION("FSL IMX8 PCIE PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/intel/Kconfig b/drivers/phy/intel/Kconfig
index ac42bb2fb394..18a3cc5b98c0 100644
--- a/drivers/phy/intel/Kconfig
+++ b/drivers/phy/intel/Kconfig
@@ -46,3 +46,13 @@ config PHY_INTEL_LGM_EMMC
select GENERIC_PHY
help
Enable this to support the Intel EMMC PHY
+
+config PHY_INTEL_THUNDERBAY_EMMC
+ tristate "Intel Thunder Bay eMMC PHY driver"
+ depends on OF && (ARCH_THUNDERBAY || COMPILE_TEST)
+ select GENERIC_PHY
+ help
+ This option enables support for Intel Thunder Bay SoC eMMC PHY.
+
+ To compile this driver as a module, choose M here: the module
+ will be called phy-intel-thunderbay-emmc.ko.
diff --git a/drivers/phy/intel/Makefile b/drivers/phy/intel/Makefile
index 14550981a707..b7321d56b0bb 100644
--- a/drivers/phy/intel/Makefile
+++ b/drivers/phy/intel/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_PHY_INTEL_KEEMBAY_EMMC) += phy-intel-keembay-emmc.o
obj-$(CONFIG_PHY_INTEL_KEEMBAY_USB) += phy-intel-keembay-usb.o
obj-$(CONFIG_PHY_INTEL_LGM_COMBO) += phy-intel-lgm-combo.o
obj-$(CONFIG_PHY_INTEL_LGM_EMMC) += phy-intel-lgm-emmc.o
+obj-$(CONFIG_PHY_INTEL_THUNDERBAY_EMMC) += phy-intel-thunderbay-emmc.o
diff --git a/drivers/phy/intel/phy-intel-thunderbay-emmc.c b/drivers/phy/intel/phy-intel-thunderbay-emmc.c
new file mode 100644
index 000000000000..593f6970b81e
--- /dev/null
+++ b/drivers/phy/intel/phy-intel-thunderbay-emmc.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel ThunderBay eMMC PHY driver
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+
+/* eMMC/SD/SDIO core/phy configuration registers */
+#define CTRL_CFG_0 0x00
+#define CTRL_CFG_1 0x04
+#define CTRL_PRESET_0 0x08
+#define CTRL_PRESET_1 0x0c
+#define CTRL_PRESET_2 0x10
+#define CTRL_PRESET_3 0x14
+#define CTRL_PRESET_4 0x18
+#define CTRL_CFG_2 0x1c
+#define CTRL_CFG_3 0x20
+#define PHY_CFG_0 0x24
+#define PHY_CFG_1 0x28
+#define PHY_CFG_2 0x2c
+#define PHYBIST_CTRL 0x30
+#define SDHC_STAT3 0x34
+#define PHY_STAT 0x38
+#define PHYBIST_STAT_0 0x3c
+#define PHYBIST_STAT_1 0x40
+#define EMMC_AXI 0x44
+
+/* CTRL_PRESET_3 */
+#define CTRL_PRESET3_MASK GENMASK(31, 0)
+#define CTRL_PRESET3_SHIFT 0
+
+/* CTRL_CFG_0 bit fields */
+#define SUPPORT_HS_MASK BIT(26)
+#define SUPPORT_HS_SHIFT 26
+
+#define SUPPORT_8B_MASK BIT(24)
+#define SUPPORT_8B_SHIFT 24
+
+/* CTRL_CFG_1 bit fields */
+#define SUPPORT_SDR50_MASK BIT(28)
+#define SUPPORT_SDR50_SHIFT 28
+#define SLOT_TYPE_MASK GENMASK(27, 26)
+#define SLOT_TYPE_OFFSET 26
+#define SUPPORT_64B_MASK BIT(24)
+#define SUPPORT_64B_SHIFT 24
+#define SUPPORT_HS400_MASK BIT(2)
+#define SUPPORT_HS400_SHIFT 2
+#define SUPPORT_DDR50_MASK BIT(1)
+#define SUPPORT_DDR50_SHIFT 1
+#define SUPPORT_SDR104_MASK BIT(0)
+#define SUPPORT_SDR104_SHIFT 0
+
+/* PHY_CFG_0 bit fields */
+#define SEL_DLY_TXCLK_MASK BIT(29)
+#define SEL_DLY_TXCLK_SHIFT 29
+#define SEL_DLY_RXCLK_MASK BIT(28)
+#define SEL_DLY_RXCLK_SHIFT 28
+
+#define OTAP_DLY_ENA_MASK BIT(27)
+#define OTAP_DLY_ENA_SHIFT 27
+#define OTAP_DLY_SEL_MASK GENMASK(26, 23)
+#define OTAP_DLY_SEL_SHIFT 23
+#define ITAP_CHG_WIN_MASK BIT(22)
+#define ITAP_CHG_WIN_SHIFT 22
+#define ITAP_DLY_ENA_MASK BIT(21)
+#define ITAP_DLY_ENA_SHIFT 21
+#define ITAP_DLY_SEL_MASK GENMASK(20, 16)
+#define ITAP_DLY_SEL_SHIFT 16
+#define RET_ENB_MASK BIT(15)
+#define RET_ENB_SHIFT 15
+#define RET_EN_MASK BIT(14)
+#define RET_EN_SHIFT 14
+#define DLL_IFF_MASK GENMASK(13, 11)
+#define DLL_IFF_SHIFT 11
+#define DLL_EN_MASK BIT(10)
+#define DLL_EN_SHIFT 10
+#define DLL_TRIM_ICP_MASK GENMASK(9, 6)
+#define DLL_TRIM_ICP_SHIFT 6
+#define RETRIM_EN_MASK BIT(5)
+#define RETRIM_EN_SHIFT 5
+#define RETRIM_MASK BIT(4)
+#define RETRIM_SHIFT 4
+#define DR_TY_MASK GENMASK(3, 1)
+#define DR_TY_SHIFT 1
+#define PWR_DOWN_MASK BIT(0)
+#define PWR_DOWN_SHIFT 0
+
+/* PHY_CFG_1 bit fields */
+#define REN_DAT_MASK GENMASK(19, 12)
+#define REN_DAT_SHIFT 12
+#define REN_CMD_MASK BIT(11)
+#define REN_CMD_SHIFT 11
+#define REN_STRB_MASK BIT(10)
+#define REN_STRB_SHIFT 10
+#define PU_STRB_MASK BIT(20)
+#define PU_STRB_SHIFT 20
+
+/* PHY_CFG_2 bit fields */
+#define CLKBUF_MASK GENMASK(24, 21)
+#define CLKBUF_SHIFT 21
+#define SEL_STRB_MASK GENMASK(20, 13)
+#define SEL_STRB_SHIFT 13
+#define SEL_FREQ_MASK GENMASK(12, 10)
+#define SEL_FREQ_SHIFT 10
+
+/* PHY_STAT bit fields */
+#define CAL_DONE BIT(6)
+#define DLL_RDY BIT(5)
+
+#define OTAP_DLY 0x0
+#define ITAP_DLY 0x0
+#define STRB 0x33
+
+/* From ACS_eMMC51_16nFFC_RO1100_Userguide_v1p0.pdf p17 */
+#define FREQSEL_200M_170M 0x0
+#define FREQSEL_170M_140M 0x1
+#define FREQSEL_140M_110M 0x2
+#define FREQSEL_110M_80M 0x3
+#define FREQSEL_80M_50M 0x4
+#define FREQSEL_275M_250M 0x5
+#define FREQSEL_250M_225M 0x6
+#define FREQSEL_225M_200M 0x7
+
+/* Phy power status */
+#define PHY_UNINITIALIZED 0
+#define PHY_INITIALIZED 1
+
+/*
+ * During init(400KHz) phy_settings will be called with 200MHZ clock
+ * To avoid incorrectly setting the phy for init(400KHZ) "phy_power_sts" is used.
+ * When actual clock is set always phy is powered off once and then powered on.
+ * (sdhci_arasan_set_clock). That feature will be used to identify whether the
+ * settings are for init phy_power_on or actual clock phy_power_on
+ * 0 --> init settings
+ * 1 --> actual settings
+ */
+
+struct thunderbay_emmc_phy {
+ void __iomem *reg_base;
+ struct clk *emmcclk;
+ int phy_power_sts;
+};
+
+static inline void update_reg(struct thunderbay_emmc_phy *tbh_phy, u32 offset,
+ u32 mask, u32 shift, u32 val)
+{
+ u32 tmp;
+
+ tmp = readl(tbh_phy->reg_base + offset);
+ tmp &= ~mask;
+ tmp |= val << shift;
+ writel(tmp, tbh_phy->reg_base + offset);
+}
+
+static int thunderbay_emmc_phy_power(struct phy *phy, bool power_on)
+{
+ struct thunderbay_emmc_phy *tbh_phy = phy_get_drvdata(phy);
+ unsigned int freqsel = FREQSEL_200M_170M;
+ unsigned long rate;
+ static int lock;
+ u32 val;
+ int ret;
+
+ /* Disable DLL */
+ rate = clk_get_rate(tbh_phy->emmcclk);
+ switch (rate) {
+ case 200000000:
+ /* lock dll only when it is used, i.e only if SEL_DLY_TXCLK/RXCLK are 0 */
+ update_reg(tbh_phy, PHY_CFG_0, DLL_EN_MASK, DLL_EN_SHIFT, 0x0);
+ break;
+
+ /* dll lock not required for other frequencies */
+ case 50000000 ... 52000000:
+ case 400000:
+ default:
+ break;
+ }
+
+ if (!power_on)
+ return 0;
+
+ rate = clk_get_rate(tbh_phy->emmcclk);
+ switch (rate) {
+ case 170000001 ... 200000000:
+ freqsel = FREQSEL_200M_170M;
+ break;
+
+ case 140000001 ... 170000000:
+ freqsel = FREQSEL_170M_140M;
+ break;
+
+ case 110000001 ... 140000000:
+ freqsel = FREQSEL_140M_110M;
+ break;
+
+ case 80000001 ... 110000000:
+ freqsel = FREQSEL_110M_80M;
+ break;
+
+ case 50000000 ... 80000000:
+ freqsel = FREQSEL_80M_50M;
+ break;
+
+ case 250000001 ... 275000000:
+ freqsel = FREQSEL_275M_250M;
+ break;
+
+ case 225000001 ... 250000000:
+ freqsel = FREQSEL_250M_225M;
+ break;
+
+ case 200000001 ... 225000000:
+ freqsel = FREQSEL_225M_200M;
+ break;
+ default:
+ break;
+ }
+ /* Clock rate is checked against upper limit. It may fall low during init */
+ if (rate > 200000000)
+ dev_warn(&phy->dev, "Unsupported rate: %lu\n", rate);
+
+ udelay(5);
+
+ if (lock == 0) {
+ /* PDB will be done only once per boot */
+ update_reg(tbh_phy, PHY_CFG_0, PWR_DOWN_MASK,
+ PWR_DOWN_SHIFT, 0x1);
+ lock = 1;
+ /*
+ * According to the user manual, it asks driver to wait 5us for
+ * calpad busy trimming. However it is documented that this value is
+ * PVT(A.K.A. process, voltage and temperature) relevant, so some
+ * failure cases are found which indicates we should be more tolerant
+ * to calpad busy trimming.
+ */
+ ret = readl_poll_timeout(tbh_phy->reg_base + PHY_STAT,
+ val, (val & CAL_DONE), 10, 50);
+ if (ret) {
+ dev_err(&phy->dev, "caldone failed, ret=%d\n", ret);
+ return ret;
+ }
+ }
+ rate = clk_get_rate(tbh_phy->emmcclk);
+ switch (rate) {
+ case 200000000:
+ /* Set frequency of the DLL operation */
+ update_reg(tbh_phy, PHY_CFG_2, SEL_FREQ_MASK, SEL_FREQ_SHIFT, freqsel);
+
+ /* Enable DLL */
+ update_reg(tbh_phy, PHY_CFG_0, DLL_EN_MASK, DLL_EN_SHIFT, 0x1);
+
+ /*
+ * After enabling analog DLL circuits docs say that we need 10.2 us if
+ * our source clock is at 50 MHz and that lock time scales linearly
+ * with clock speed. If we are powering on the PHY and the card clock
+ * is super slow (like 100kHz) this could take as long as 5.1 ms as
+ * per the math: 10.2 us * (50000000 Hz / 100000 Hz) => 5.1 ms
+ * hopefully we won't be running at 100 kHz, but we should still make
+ * sure we wait long enough.
+ *
+ * NOTE: There appear to be corner cases where the DLL seems to take
+ * extra long to lock for reasons that aren't understood. In some
+ * extreme cases we've seen it take up to over 10ms (!). We'll be
+ * generous and give it 50ms.
+ */
+ ret = readl_poll_timeout(tbh_phy->reg_base + PHY_STAT,
+ val, (val & DLL_RDY), 10, 50 * USEC_PER_MSEC);
+ if (ret) {
+ dev_err(&phy->dev, "dllrdy failed, ret=%d\n", ret);
+ return ret;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int thunderbay_emmc_phy_init(struct phy *phy)
+{
+ struct thunderbay_emmc_phy *tbh_phy = phy_get_drvdata(phy);
+
+ tbh_phy->emmcclk = clk_get(&phy->dev, "emmcclk");
+
+ return PTR_ERR_OR_ZERO(tbh_phy->emmcclk);
+}
+
+static int thunderbay_emmc_phy_exit(struct phy *phy)
+{
+ struct thunderbay_emmc_phy *tbh_phy = phy_get_drvdata(phy);
+
+ clk_put(tbh_phy->emmcclk);
+
+ return 0;
+}
+
+static int thunderbay_emmc_phy_power_on(struct phy *phy)
+{
+ struct thunderbay_emmc_phy *tbh_phy = phy_get_drvdata(phy);
+ unsigned long rate;
+
+ /* Overwrite capability bits configurable in bootloader */
+ update_reg(tbh_phy, CTRL_CFG_0,
+ SUPPORT_HS_MASK, SUPPORT_HS_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_0,
+ SUPPORT_8B_MASK, SUPPORT_8B_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_1,
+ SUPPORT_SDR50_MASK, SUPPORT_SDR50_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_1,
+ SUPPORT_DDR50_MASK, SUPPORT_DDR50_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_1,
+ SUPPORT_SDR104_MASK, SUPPORT_SDR104_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_1,
+ SUPPORT_HS400_MASK, SUPPORT_HS400_SHIFT, 0x1);
+ update_reg(tbh_phy, CTRL_CFG_1,
+ SUPPORT_64B_MASK, SUPPORT_64B_SHIFT, 0x1);
+
+ if (tbh_phy->phy_power_sts == PHY_UNINITIALIZED) {
+ /* Indicates initialization, settings for init, same as 400KHZ setting */
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_TXCLK_MASK, SEL_DLY_TXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_RXCLK_MASK, SEL_DLY_RXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_ENA_MASK, ITAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_SEL_MASK, ITAP_DLY_SEL_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_ENA_MASK, OTAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_SEL_MASK, OTAP_DLY_SEL_SHIFT, 0);
+ update_reg(tbh_phy, PHY_CFG_0, DLL_TRIM_ICP_MASK, DLL_TRIM_ICP_SHIFT, 0);
+ update_reg(tbh_phy, PHY_CFG_0, DR_TY_MASK, DR_TY_SHIFT, 0x1);
+
+ } else if (tbh_phy->phy_power_sts == PHY_INITIALIZED) {
+ /* Indicates actual clock setting */
+ rate = clk_get_rate(tbh_phy->emmcclk);
+ switch (rate) {
+ case 200000000:
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_TXCLK_MASK,
+ SEL_DLY_TXCLK_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_RXCLK_MASK,
+ SEL_DLY_RXCLK_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_ENA_MASK,
+ ITAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_SEL_MASK,
+ ITAP_DLY_SEL_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_ENA_MASK,
+ OTAP_DLY_ENA_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_SEL_MASK,
+ OTAP_DLY_SEL_SHIFT, 2);
+ update_reg(tbh_phy, PHY_CFG_0, DLL_TRIM_ICP_MASK,
+ DLL_TRIM_ICP_SHIFT, 0x8);
+ update_reg(tbh_phy, PHY_CFG_0, DR_TY_MASK,
+ DR_TY_SHIFT, 0x1);
+ /* For HS400 only */
+ update_reg(tbh_phy, PHY_CFG_2, SEL_STRB_MASK,
+ SEL_STRB_SHIFT, STRB);
+ break;
+
+ case 50000000 ... 52000000:
+ /* For both HS and DDR52 this setting works */
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_TXCLK_MASK,
+ SEL_DLY_TXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_RXCLK_MASK,
+ SEL_DLY_RXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_ENA_MASK,
+ ITAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_SEL_MASK,
+ ITAP_DLY_SEL_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_ENA_MASK,
+ OTAP_DLY_ENA_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_SEL_MASK,
+ OTAP_DLY_SEL_SHIFT, 4);
+ update_reg(tbh_phy, PHY_CFG_0, DLL_TRIM_ICP_MASK,
+ DLL_TRIM_ICP_SHIFT, 0x8);
+ update_reg(tbh_phy, PHY_CFG_0,
+ DR_TY_MASK, DR_TY_SHIFT, 0x1);
+ break;
+
+ case 400000:
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_TXCLK_MASK,
+ SEL_DLY_TXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_RXCLK_MASK,
+ SEL_DLY_RXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_ENA_MASK,
+ ITAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_SEL_MASK,
+ ITAP_DLY_SEL_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_ENA_MASK,
+ OTAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_SEL_MASK,
+ OTAP_DLY_SEL_SHIFT, 0);
+ update_reg(tbh_phy, PHY_CFG_0, DLL_TRIM_ICP_MASK,
+ DLL_TRIM_ICP_SHIFT, 0);
+ update_reg(tbh_phy, PHY_CFG_0, DR_TY_MASK, DR_TY_SHIFT, 0x1);
+ break;
+
+ default:
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_TXCLK_MASK,
+ SEL_DLY_TXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, SEL_DLY_RXCLK_MASK,
+ SEL_DLY_RXCLK_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_ENA_MASK,
+ ITAP_DLY_ENA_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, ITAP_DLY_SEL_MASK,
+ ITAP_DLY_SEL_SHIFT, 0x0);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_ENA_MASK,
+ OTAP_DLY_ENA_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, OTAP_DLY_SEL_MASK,
+ OTAP_DLY_SEL_SHIFT, 2);
+ update_reg(tbh_phy, PHY_CFG_0, DLL_TRIM_ICP_MASK,
+ DLL_TRIM_ICP_SHIFT, 0x8);
+ update_reg(tbh_phy, PHY_CFG_0, DR_TY_MASK,
+ DR_TY_SHIFT, 0x1);
+ break;
+ }
+ /* Reset, init seq called without phy_power_off, this indicates init seq */
+ tbh_phy->phy_power_sts = PHY_UNINITIALIZED;
+ }
+
+ update_reg(tbh_phy, PHY_CFG_0, RETRIM_EN_MASK, RETRIM_EN_SHIFT, 0x1);
+ update_reg(tbh_phy, PHY_CFG_0, RETRIM_MASK, RETRIM_SHIFT, 0x0);
+
+ return thunderbay_emmc_phy_power(phy, 1);
+}
+
+static int thunderbay_emmc_phy_power_off(struct phy *phy)
+{
+ struct thunderbay_emmc_phy *tbh_phy = phy_get_drvdata(phy);
+
+ tbh_phy->phy_power_sts = PHY_INITIALIZED;
+
+ return thunderbay_emmc_phy_power(phy, 0);
+}
+
+static const struct phy_ops thunderbay_emmc_phy_ops = {
+ .init = thunderbay_emmc_phy_init,
+ .exit = thunderbay_emmc_phy_exit,
+ .power_on = thunderbay_emmc_phy_power_on,
+ .power_off = thunderbay_emmc_phy_power_off,
+ .owner = THIS_MODULE,
+};
+
+static const struct of_device_id thunderbay_emmc_phy_of_match[] = {
+ { .compatible = "intel,thunderbay-emmc-phy",
+ (void *)&thunderbay_emmc_phy_ops },
+ {}
+};
+MODULE_DEVICE_TABLE(of, thunderbay_emmc_phy_of_match);
+
+static int thunderbay_emmc_phy_probe(struct platform_device *pdev)
+{
+ struct thunderbay_emmc_phy *tbh_phy;
+ struct phy_provider *phy_provider;
+ struct device *dev = &pdev->dev;
+ const struct of_device_id *id;
+ struct phy *generic_phy;
+ struct resource *res;
+
+ if (!dev->of_node)
+ return -ENODEV;
+
+ tbh_phy = devm_kzalloc(dev, sizeof(*tbh_phy), GFP_KERNEL);
+ if (!tbh_phy)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tbh_phy->reg_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(tbh_phy->reg_base))
+ return PTR_ERR(tbh_phy->reg_base);
+
+ tbh_phy->phy_power_sts = PHY_UNINITIALIZED;
+ id = of_match_node(thunderbay_emmc_phy_of_match, pdev->dev.of_node);
+ if (!id) {
+ dev_err(dev, "failed to get match_node\n");
+ return -EINVAL;
+ }
+
+ generic_phy = devm_phy_create(dev, dev->of_node, id->data);
+ if (IS_ERR(generic_phy)) {
+ dev_err(dev, "failed to create PHY\n");
+ return PTR_ERR(generic_phy);
+ }
+
+ phy_set_drvdata(generic_phy, tbh_phy);
+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+ return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static struct platform_driver thunderbay_emmc_phy_driver = {
+ .probe = thunderbay_emmc_phy_probe,
+ .driver = {
+ .name = "thunderbay-emmc-phy",
+ .of_match_table = thunderbay_emmc_phy_of_match,
+ },
+};
+module_platform_driver(thunderbay_emmc_phy_driver);
+
+MODULE_AUTHOR("Nandhini S <nandhini.srikandan@intel.com>");
+MODULE_AUTHOR("Rashmi A <rashmi.a@intel.com>");
+MODULE_DESCRIPTION("Intel Thunder Bay eMMC PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/mediatek/phy-mtk-io.h b/drivers/phy/mediatek/phy-mtk-io.h
new file mode 100644
index 000000000000..500fcdab165d
--- /dev/null
+++ b/drivers/phy/mediatek/phy-mtk-io.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 MediaTek Inc.
+ *
+ * Author: Chunfeng Yun <chunfeng.yun@mediatek.com>
+ */
+
+#ifndef __PHY_MTK_H__
+#define __PHY_MTK_H__
+
+#include <linux/io.h>
+
+static inline void mtk_phy_clear_bits(void __iomem *reg, u32 bits)
+{
+ u32 tmp = readl(reg);
+
+ tmp &= ~bits;
+ writel(tmp, reg);
+}
+
+static inline void mtk_phy_set_bits(void __iomem *reg, u32 bits)
+{
+ u32 tmp = readl(reg);
+
+ tmp |= bits;
+ writel(tmp, reg);
+}
+
+static inline void mtk_phy_update_bits(void __iomem *reg, u32 mask, u32 val)
+{
+ u32 tmp = readl(reg);
+
+ tmp &= ~mask;
+ tmp |= val & mask;
+ writel(tmp, reg);
+}
+
+#endif
diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
index 28ad9403c441..67b005d5b9e3 100644
--- a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
+++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
@@ -146,6 +146,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
return -ENOMEM;
mipi_tx->driver_data = of_device_get_match_data(dev);
+ if (!mipi_tx->driver_data)
+ return -ENODEV;
mipi_tx->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(mipi_tx->regs))
diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c
index cdcef865fe9e..6d307102f4f6 100644
--- a/drivers/phy/mediatek/phy-mtk-tphy.c
+++ b/drivers/phy/mediatek/phy-mtk-tphy.c
@@ -8,16 +8,18 @@
#include <dt-bindings/phy/phy.h>
#include <linux/clk.h>
#include <linux/delay.h>
-#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
+#include "phy-mtk-io.h"
+
/* version V1 sub-banks offset base address */
/* banks shared by multiple phys */
#define SSUSB_SIFSLV_V1_SPLLC 0x000 /* shared by u3 phys */
@@ -41,6 +43,9 @@
#define SSUSB_SIFSLV_V2_U3PHYD 0x200
#define SSUSB_SIFSLV_V2_U3PHYA 0x400
+#define U3P_MISC_REG1 0x04
+#define MR1_EFUSE_AUTO_LOAD_DIS BIT(6)
+
#define U3P_USBPHYACR0 0x000
#define PA0_RG_U2PLL_FORCE_ON BIT(15)
#define PA0_USB20_PLL_PREDIV GENMASK(7, 6)
@@ -133,6 +138,8 @@
#define P3C_RG_SWRST_U3_PHYD_FORCE_EN BIT(24)
#define U3P_U3_PHYA_REG0 0x000
+#define P3A_RG_IEXT_INTR GENMASK(15, 10)
+#define P3A_RG_IEXT_INTR_VAL(x) ((0x3f & (x)) << 10)
#define P3A_RG_CLKDRV_OFF GENMASK(3, 2)
#define P3A_RG_CLKDRV_OFF_VAL(x) ((0x3 & (x)) << 2)
@@ -187,6 +194,19 @@
#define P3D_RG_FWAKE_TH GENMASK(21, 16)
#define P3D_RG_FWAKE_TH_VAL(x) ((0x3f & (x)) << 16)
+#define U3P_U3_PHYD_IMPCAL0 0x010
+#define P3D_RG_FORCE_TX_IMPEL BIT(31)
+#define P3D_RG_TX_IMPEL GENMASK(28, 24)
+#define P3D_RG_TX_IMPEL_VAL(x) ((0x1f & (x)) << 24)
+
+#define U3P_U3_PHYD_IMPCAL1 0x014
+#define P3D_RG_FORCE_RX_IMPEL BIT(31)
+#define P3D_RG_RX_IMPEL GENMASK(28, 24)
+#define P3D_RG_RX_IMPEL_VAL(x) ((0x1f & (x)) << 24)
+
+#define U3P_U3_PHYD_RSV 0x054
+#define P3D_RG_EFUSE_AUTO_LOAD_DIS BIT(12)
+
#define U3P_U3_PHYD_CDR1 0x05c
#define P3D_RG_CDR_BIR_LTD1 GENMASK(28, 24)
#define P3D_RG_CDR_BIR_LTD1_VAL(x) ((0x1f & (x)) << 24)
@@ -307,6 +327,11 @@ struct mtk_phy_pdata {
* 48M PLL, fix it by switching PLL to 26M from default 48M
*/
bool sw_pll_48m_to_26m;
+ /*
+ * Some SoCs (e.g. mt8195) drop a bit when use auto load efuse,
+ * support sw way, also support it for v2/v3 optionally.
+ */
+ bool sw_efuse_supported;
enum mtk_phy_version version;
};
@@ -336,6 +361,10 @@ struct mtk_phy_instance {
struct regmap *type_sw;
u32 type_sw_reg;
u32 type_sw_index;
+ u32 efuse_sw_en;
+ u32 efuse_intr;
+ u32 efuse_tx_imp;
+ u32 efuse_rx_imp;
int eye_src;
int eye_vrt;
int eye_term;
@@ -373,15 +402,11 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy,
return;
/* enable USB ring oscillator */
- tmp = readl(com + U3P_USBPHYACR5);
- tmp |= PA5_RG_U2_HSTX_SRCAL_EN;
- writel(tmp, com + U3P_USBPHYACR5);
+ mtk_phy_set_bits(com + U3P_USBPHYACR5, PA5_RG_U2_HSTX_SRCAL_EN);
udelay(1);
/*enable free run clock */
- tmp = readl(fmreg + U3P_U2FREQ_FMMONR1);
- tmp |= P2F_RG_FRCK_EN;
- writel(tmp, fmreg + U3P_U2FREQ_FMMONR1);
+ mtk_phy_set_bits(fmreg + U3P_U2FREQ_FMMONR1, P2F_RG_FRCK_EN);
/* set cycle count as 1024, and select u2 channel */
tmp = readl(fmreg + U3P_U2FREQ_FMCR0);
@@ -393,9 +418,7 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy,
writel(tmp, fmreg + U3P_U2FREQ_FMCR0);
/* enable frequency meter */
- tmp = readl(fmreg + U3P_U2FREQ_FMCR0);
- tmp |= P2F_RG_FREQDET_EN;
- writel(tmp, fmreg + U3P_U2FREQ_FMCR0);
+ mtk_phy_set_bits(fmreg + U3P_U2FREQ_FMCR0, P2F_RG_FREQDET_EN);
/* ignore return value */
readl_poll_timeout(fmreg + U3P_U2FREQ_FMMONR1, tmp,
@@ -404,14 +427,10 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy,
fm_out = readl(fmreg + U3P_U2FREQ_VALUE);
/* disable frequency meter */
- tmp = readl(fmreg + U3P_U2FREQ_FMCR0);
- tmp &= ~P2F_RG_FREQDET_EN;
- writel(tmp, fmreg + U3P_U2FREQ_FMCR0);
+ mtk_phy_clear_bits(fmreg + U3P_U2FREQ_FMCR0, P2F_RG_FREQDET_EN);
/*disable free run clock */
- tmp = readl(fmreg + U3P_U2FREQ_FMMONR1);
- tmp &= ~P2F_RG_FRCK_EN;
- writel(tmp, fmreg + U3P_U2FREQ_FMMONR1);
+ mtk_phy_clear_bits(fmreg + U3P_U2FREQ_FMMONR1, P2F_RG_FRCK_EN);
if (fm_out) {
/* ( 1024 / FM_OUT ) x reference clock frequency x coef */
@@ -427,63 +446,44 @@ static void hs_slew_rate_calibrate(struct mtk_tphy *tphy,
tphy->src_ref_clk, tphy->src_coef);
/* set HS slew rate */
- tmp = readl(com + U3P_USBPHYACR5);
- tmp &= ~PA5_RG_U2_HSTX_SRCTRL;
- tmp |= PA5_RG_U2_HSTX_SRCTRL_VAL(calibration_val);
- writel(tmp, com + U3P_USBPHYACR5);
+ mtk_phy_update_bits(com + U3P_USBPHYACR5, PA5_RG_U2_HSTX_SRCTRL,
+ PA5_RG_U2_HSTX_SRCTRL_VAL(calibration_val));
/* disable USB ring oscillator */
- tmp = readl(com + U3P_USBPHYACR5);
- tmp &= ~PA5_RG_U2_HSTX_SRCAL_EN;
- writel(tmp, com + U3P_USBPHYACR5);
+ mtk_phy_clear_bits(com + U3P_USBPHYACR5, PA5_RG_U2_HSTX_SRCAL_EN);
}
static void u3_phy_instance_init(struct mtk_tphy *tphy,
struct mtk_phy_instance *instance)
{
struct u3phy_banks *u3_banks = &instance->u3_banks;
- u32 tmp;
/* gating PCIe Analog XTAL clock */
- tmp = readl(u3_banks->spllc + U3P_SPLLC_XTALCTL3);
- tmp |= XC3_RG_U3_XTAL_RX_PWD | XC3_RG_U3_FRC_XTAL_RX_PWD;
- writel(tmp, u3_banks->spllc + U3P_SPLLC_XTALCTL3);
+ mtk_phy_set_bits(u3_banks->spllc + U3P_SPLLC_XTALCTL3,
+ XC3_RG_U3_XTAL_RX_PWD | XC3_RG_U3_FRC_XTAL_RX_PWD);
/* gating XSQ */
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG0);
- tmp &= ~P3A_RG_XTAL_EXT_EN_U3;
- tmp |= P3A_RG_XTAL_EXT_EN_U3_VAL(2);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG0);
-
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_REG9);
- tmp &= ~P3A_RG_RX_DAC_MUX;
- tmp |= P3A_RG_RX_DAC_MUX_VAL(4);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_REG9);
-
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_REG6);
- tmp &= ~P3A_RG_TX_EIDLE_CM;
- tmp |= P3A_RG_TX_EIDLE_CM_VAL(0xe);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_REG6);
-
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_CDR1);
- tmp &= ~(P3D_RG_CDR_BIR_LTD0 | P3D_RG_CDR_BIR_LTD1);
- tmp |= P3D_RG_CDR_BIR_LTD0_VAL(0xc) | P3D_RG_CDR_BIR_LTD1_VAL(0x3);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_CDR1);
-
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_LFPS1);
- tmp &= ~P3D_RG_FWAKE_TH;
- tmp |= P3D_RG_FWAKE_TH_VAL(0x34);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_LFPS1);
-
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_RXDET1);
- tmp &= ~P3D_RG_RXDET_STB2_SET;
- tmp |= P3D_RG_RXDET_STB2_SET_VAL(0x10);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_RXDET1);
-
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_RXDET2);
- tmp &= ~P3D_RG_RXDET_STB2_SET_P3;
- tmp |= P3D_RG_RXDET_STB2_SET_P3_VAL(0x10);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_RXDET2);
+ mtk_phy_update_bits(u3_banks->phya + U3P_U3_PHYA_DA_REG0,
+ P3A_RG_XTAL_EXT_EN_U3, P3A_RG_XTAL_EXT_EN_U3_VAL(2));
+
+ mtk_phy_update_bits(u3_banks->phya + U3P_U3_PHYA_REG9,
+ P3A_RG_RX_DAC_MUX, P3A_RG_RX_DAC_MUX_VAL(4));
+
+ mtk_phy_update_bits(u3_banks->phya + U3P_U3_PHYA_REG6,
+ P3A_RG_TX_EIDLE_CM, P3A_RG_TX_EIDLE_CM_VAL(0xe));
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_CDR1,
+ P3D_RG_CDR_BIR_LTD0 | P3D_RG_CDR_BIR_LTD1,
+ P3D_RG_CDR_BIR_LTD0_VAL(0xc) | P3D_RG_CDR_BIR_LTD1_VAL(0x3));
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_LFPS1,
+ P3D_RG_FWAKE_TH, P3D_RG_FWAKE_TH_VAL(0x34));
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_RXDET1,
+ P3D_RG_RXDET_STB2_SET, P3D_RG_RXDET_STB2_SET_VAL(0x10));
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_RXDET2,
+ P3D_RG_RXDET_STB2_SET_P3, P3D_RG_RXDET_STB2_SET_P3_VAL(0x10));
dev_dbg(tphy->dev, "%s(%d)\n", __func__, instance->index);
}
@@ -493,26 +493,20 @@ static void u2_phy_pll_26m_set(struct mtk_tphy *tphy,
{
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
- u32 tmp;
if (!tphy->pdata->sw_pll_48m_to_26m)
return;
- tmp = readl(com + U3P_USBPHYACR0);
- tmp &= ~PA0_USB20_PLL_PREDIV;
- tmp |= PA0_USB20_PLL_PREDIV_VAL(0);
- writel(tmp, com + U3P_USBPHYACR0);
+ mtk_phy_update_bits(com + U3P_USBPHYACR0, PA0_USB20_PLL_PREDIV,
+ PA0_USB20_PLL_PREDIV_VAL(0));
- tmp = readl(com + U3P_USBPHYACR2);
- tmp &= ~PA2_RG_U2PLL_BW;
- tmp |= PA2_RG_U2PLL_BW_VAL(3);
- writel(tmp, com + U3P_USBPHYACR2);
+ mtk_phy_update_bits(com + U3P_USBPHYACR2, PA2_RG_U2PLL_BW,
+ PA2_RG_U2PLL_BW_VAL(3));
writel(P2R_RG_U2PLL_FBDIV_26M, com + U3P_U2PHYA_RESV);
- tmp = readl(com + U3P_U2PHYA_RESV1);
- tmp |= P2R_RG_U2PLL_FRA_EN | P2R_RG_U2PLL_REFCLK_SEL;
- writel(tmp, com + U3P_U2PHYA_RESV1);
+ mtk_phy_set_bits(com + U3P_U2PHYA_RESV1,
+ P2R_RG_U2PLL_FRA_EN | P2R_RG_U2PLL_REFCLK_SEL);
}
static void u2_phy_instance_init(struct mtk_tphy *tphy,
@@ -521,58 +515,40 @@ static void u2_phy_instance_init(struct mtk_tphy *tphy,
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
u32 index = instance->index;
- u32 tmp;
/* switch to USB function, and enable usb pll */
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp &= ~(P2C_FORCE_UART_EN | P2C_FORCE_SUSPENDM);
- tmp |= P2C_RG_XCVRSEL_VAL(1) | P2C_RG_DATAIN_VAL(0);
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM0, P2C_FORCE_UART_EN | P2C_FORCE_SUSPENDM);
+
+ mtk_phy_update_bits(com + U3P_U2PHYDTM0, P2C_RG_XCVRSEL | P2C_RG_DATAIN,
+ P2C_RG_XCVRSEL_VAL(1) | P2C_RG_DATAIN_VAL(0));
- tmp = readl(com + U3P_U2PHYDTM1);
- tmp &= ~P2C_RG_UART_EN;
- writel(tmp, com + U3P_U2PHYDTM1);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM1, P2C_RG_UART_EN);
- tmp = readl(com + U3P_USBPHYACR0);
- tmp |= PA0_RG_USB20_INTR_EN;
- writel(tmp, com + U3P_USBPHYACR0);
+ mtk_phy_set_bits(com + U3P_USBPHYACR0, PA0_RG_USB20_INTR_EN);
/* disable switch 100uA current to SSUSB */
- tmp = readl(com + U3P_USBPHYACR5);
- tmp &= ~PA5_RG_U2_HS_100U_U3_EN;
- writel(tmp, com + U3P_USBPHYACR5);
-
- if (!index) {
- tmp = readl(com + U3P_U2PHYACR4);
- tmp &= ~P2C_U2_GPIO_CTR_MSK;
- writel(tmp, com + U3P_U2PHYACR4);
- }
+ mtk_phy_clear_bits(com + U3P_USBPHYACR5, PA5_RG_U2_HS_100U_U3_EN);
+
+ if (!index)
+ mtk_phy_clear_bits(com + U3P_U2PHYACR4, P2C_U2_GPIO_CTR_MSK);
if (tphy->pdata->avoid_rx_sen_degradation) {
if (!index) {
- tmp = readl(com + U3P_USBPHYACR2);
- tmp |= PA2_RG_SIF_U2PLL_FORCE_EN;
- writel(tmp, com + U3P_USBPHYACR2);
+ mtk_phy_set_bits(com + U3P_USBPHYACR2, PA2_RG_SIF_U2PLL_FORCE_EN);
- tmp = readl(com + U3D_U2PHYDCR0);
- tmp &= ~P2C_RG_SIF_U2PLL_FORCE_ON;
- writel(tmp, com + U3D_U2PHYDCR0);
+ mtk_phy_clear_bits(com + U3D_U2PHYDCR0, P2C_RG_SIF_U2PLL_FORCE_ON);
} else {
- tmp = readl(com + U3D_U2PHYDCR0);
- tmp |= P2C_RG_SIF_U2PLL_FORCE_ON;
- writel(tmp, com + U3D_U2PHYDCR0);
+ mtk_phy_set_bits(com + U3D_U2PHYDCR0, P2C_RG_SIF_U2PLL_FORCE_ON);
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp |= P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM;
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_set_bits(com + U3P_U2PHYDTM0,
+ P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM);
}
}
- tmp = readl(com + U3P_USBPHYACR6);
- tmp &= ~PA6_RG_U2_BC11_SW_EN; /* DP/DM BC1.1 path Disable */
- tmp &= ~PA6_RG_U2_SQTH;
- tmp |= PA6_RG_U2_SQTH_VAL(2);
- writel(tmp, com + U3P_USBPHYACR6);
+ /* DP/DM BC1.1 path Disable */
+ mtk_phy_clear_bits(com + U3P_USBPHYACR6, PA6_RG_U2_BC11_SW_EN);
+
+ mtk_phy_update_bits(com + U3P_USBPHYACR6, PA6_RG_U2_SQTH, PA6_RG_U2_SQTH_VAL(2));
/* Workaround only for mt8195, HW fix it for others (V3) */
u2_phy_pll_26m_set(tphy, instance);
@@ -586,30 +562,21 @@ static void u2_phy_instance_power_on(struct mtk_tphy *tphy,
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
u32 index = instance->index;
- u32 tmp;
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp &= ~(P2C_RG_XCVRSEL | P2C_RG_DATAIN | P2C_DTM0_PART_MASK);
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM0,
+ P2C_RG_XCVRSEL | P2C_RG_DATAIN | P2C_DTM0_PART_MASK);
/* OTG Enable */
- tmp = readl(com + U3P_USBPHYACR6);
- tmp |= PA6_RG_U2_OTG_VBUSCMP_EN;
- writel(tmp, com + U3P_USBPHYACR6);
+ mtk_phy_set_bits(com + U3P_USBPHYACR6, PA6_RG_U2_OTG_VBUSCMP_EN);
+
+ mtk_phy_set_bits(com + U3P_U2PHYDTM1, P2C_RG_VBUSVALID | P2C_RG_AVALID);
- tmp = readl(com + U3P_U2PHYDTM1);
- tmp |= P2C_RG_VBUSVALID | P2C_RG_AVALID;
- tmp &= ~P2C_RG_SESSEND;
- writel(tmp, com + U3P_U2PHYDTM1);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM1, P2C_RG_SESSEND);
if (tphy->pdata->avoid_rx_sen_degradation && index) {
- tmp = readl(com + U3D_U2PHYDCR0);
- tmp |= P2C_RG_SIF_U2PLL_FORCE_ON;
- writel(tmp, com + U3D_U2PHYDCR0);
+ mtk_phy_set_bits(com + U3D_U2PHYDCR0, P2C_RG_SIF_U2PLL_FORCE_ON);
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp |= P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM;
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_set_bits(com + U3P_U2PHYDTM0, P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM);
}
dev_dbg(tphy->dev, "%s(%d)\n", __func__, index);
}
@@ -620,30 +587,20 @@ static void u2_phy_instance_power_off(struct mtk_tphy *tphy,
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
u32 index = instance->index;
- u32 tmp;
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp &= ~(P2C_RG_XCVRSEL | P2C_RG_DATAIN);
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM0, P2C_RG_XCVRSEL | P2C_RG_DATAIN);
/* OTG Disable */
- tmp = readl(com + U3P_USBPHYACR6);
- tmp &= ~PA6_RG_U2_OTG_VBUSCMP_EN;
- writel(tmp, com + U3P_USBPHYACR6);
+ mtk_phy_clear_bits(com + U3P_USBPHYACR6, PA6_RG_U2_OTG_VBUSCMP_EN);
+
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM1, P2C_RG_VBUSVALID | P2C_RG_AVALID);
- tmp = readl(com + U3P_U2PHYDTM1);
- tmp &= ~(P2C_RG_VBUSVALID | P2C_RG_AVALID);
- tmp |= P2C_RG_SESSEND;
- writel(tmp, com + U3P_U2PHYDTM1);
+ mtk_phy_set_bits(com + U3P_U2PHYDTM1, P2C_RG_SESSEND);
if (tphy->pdata->avoid_rx_sen_degradation && index) {
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp &= ~(P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM);
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM0, P2C_RG_SUSPENDM | P2C_FORCE_SUSPENDM);
- tmp = readl(com + U3D_U2PHYDCR0);
- tmp &= ~P2C_RG_SIF_U2PLL_FORCE_ON;
- writel(tmp, com + U3D_U2PHYDCR0);
+ mtk_phy_clear_bits(com + U3D_U2PHYDCR0, P2C_RG_SIF_U2PLL_FORCE_ON);
}
dev_dbg(tphy->dev, "%s(%d)\n", __func__, index);
@@ -655,16 +612,11 @@ static void u2_phy_instance_exit(struct mtk_tphy *tphy,
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
u32 index = instance->index;
- u32 tmp;
if (tphy->pdata->avoid_rx_sen_degradation && index) {
- tmp = readl(com + U3D_U2PHYDCR0);
- tmp &= ~P2C_RG_SIF_U2PLL_FORCE_ON;
- writel(tmp, com + U3D_U2PHYDCR0);
+ mtk_phy_clear_bits(com + U3D_U2PHYDCR0, P2C_RG_SIF_U2PLL_FORCE_ON);
- tmp = readl(com + U3P_U2PHYDTM0);
- tmp &= ~P2C_FORCE_SUSPENDM;
- writel(tmp, com + U3P_U2PHYDTM0);
+ mtk_phy_clear_bits(com + U3P_U2PHYDTM0, P2C_FORCE_SUSPENDM);
}
}
@@ -697,69 +649,50 @@ static void pcie_phy_instance_init(struct mtk_tphy *tphy,
struct mtk_phy_instance *instance)
{
struct u3phy_banks *u3_banks = &instance->u3_banks;
- u32 tmp;
+ void __iomem *phya = u3_banks->phya;
if (tphy->pdata->version != MTK_PHY_V1)
return;
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG0);
- tmp &= ~(P3A_RG_XTAL_EXT_PE1H | P3A_RG_XTAL_EXT_PE2H);
- tmp |= P3A_RG_XTAL_EXT_PE1H_VAL(0x2) | P3A_RG_XTAL_EXT_PE2H_VAL(0x2);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG0);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG0,
+ P3A_RG_XTAL_EXT_PE1H | P3A_RG_XTAL_EXT_PE2H,
+ P3A_RG_XTAL_EXT_PE1H_VAL(0x2) | P3A_RG_XTAL_EXT_PE2H_VAL(0x2));
/* ref clk drive */
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_REG1);
- tmp &= ~P3A_RG_CLKDRV_AMP;
- tmp |= P3A_RG_CLKDRV_AMP_VAL(0x4);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_REG1);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_REG1, P3A_RG_CLKDRV_AMP,
+ P3A_RG_CLKDRV_AMP_VAL(0x4));
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_REG0);
- tmp &= ~P3A_RG_CLKDRV_OFF;
- tmp |= P3A_RG_CLKDRV_OFF_VAL(0x1);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_REG0);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_REG0, P3A_RG_CLKDRV_OFF,
+ P3A_RG_CLKDRV_OFF_VAL(0x1));
/* SSC delta -5000ppm */
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG20);
- tmp &= ~P3A_RG_PLL_DELTA1_PE2H;
- tmp |= P3A_RG_PLL_DELTA1_PE2H_VAL(0x3c);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG20);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG20, P3A_RG_PLL_DELTA1_PE2H,
+ P3A_RG_PLL_DELTA1_PE2H_VAL(0x3c));
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG25);
- tmp &= ~P3A_RG_PLL_DELTA_PE2H;
- tmp |= P3A_RG_PLL_DELTA_PE2H_VAL(0x36);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG25);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG25, P3A_RG_PLL_DELTA_PE2H,
+ P3A_RG_PLL_DELTA_PE2H_VAL(0x36));
/* change pll BW 0.6M */
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG5);
- tmp &= ~(P3A_RG_PLL_BR_PE2H | P3A_RG_PLL_IC_PE2H);
- tmp |= P3A_RG_PLL_BR_PE2H_VAL(0x1) | P3A_RG_PLL_IC_PE2H_VAL(0x1);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG5);
-
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG4);
- tmp &= ~(P3A_RG_PLL_DIVEN_PE2H | P3A_RG_PLL_BC_PE2H);
- tmp |= P3A_RG_PLL_BC_PE2H_VAL(0x3);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG4);
-
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG6);
- tmp &= ~P3A_RG_PLL_IR_PE2H;
- tmp |= P3A_RG_PLL_IR_PE2H_VAL(0x2);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG6);
-
- tmp = readl(u3_banks->phya + U3P_U3_PHYA_DA_REG7);
- tmp &= ~P3A_RG_PLL_BP_PE2H;
- tmp |= P3A_RG_PLL_BP_PE2H_VAL(0xa);
- writel(tmp, u3_banks->phya + U3P_U3_PHYA_DA_REG7);
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG5,
+ P3A_RG_PLL_BR_PE2H | P3A_RG_PLL_IC_PE2H,
+ P3A_RG_PLL_BR_PE2H_VAL(0x1) | P3A_RG_PLL_IC_PE2H_VAL(0x1));
+
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG4,
+ P3A_RG_PLL_DIVEN_PE2H | P3A_RG_PLL_BC_PE2H,
+ P3A_RG_PLL_BC_PE2H_VAL(0x3));
+
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG6, P3A_RG_PLL_IR_PE2H,
+ P3A_RG_PLL_IR_PE2H_VAL(0x2));
+
+ mtk_phy_update_bits(phya + U3P_U3_PHYA_DA_REG7, P3A_RG_PLL_BP_PE2H,
+ P3A_RG_PLL_BP_PE2H_VAL(0xa));
/* Tx Detect Rx Timing: 10us -> 5us */
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_RXDET1);
- tmp &= ~P3D_RG_RXDET_STB2_SET;
- tmp |= P3D_RG_RXDET_STB2_SET_VAL(0x10);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_RXDET1);
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_RXDET1,
+ P3D_RG_RXDET_STB2_SET, P3D_RG_RXDET_STB2_SET_VAL(0x10));
- tmp = readl(u3_banks->phyd + U3P_U3_PHYD_RXDET2);
- tmp &= ~P3D_RG_RXDET_STB2_SET_P3;
- tmp |= P3D_RG_RXDET_STB2_SET_P3_VAL(0x10);
- writel(tmp, u3_banks->phyd + U3P_U3_PHYD_RXDET2);
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_RXDET2,
+ P3D_RG_RXDET_STB2_SET_P3, P3D_RG_RXDET_STB2_SET_P3_VAL(0x10));
/* wait for PCIe subsys register to active */
usleep_range(2500, 3000);
@@ -770,15 +703,12 @@ static void pcie_phy_instance_power_on(struct mtk_tphy *tphy,
struct mtk_phy_instance *instance)
{
struct u3phy_banks *bank = &instance->u3_banks;
- u32 tmp;
- tmp = readl(bank->chip + U3P_U3_CHIP_GPIO_CTLD);
- tmp &= ~(P3C_FORCE_IP_SW_RST | P3C_REG_IP_SW_RST);
- writel(tmp, bank->chip + U3P_U3_CHIP_GPIO_CTLD);
+ mtk_phy_clear_bits(bank->chip + U3P_U3_CHIP_GPIO_CTLD,
+ P3C_FORCE_IP_SW_RST | P3C_REG_IP_SW_RST);
- tmp = readl(bank->chip + U3P_U3_CHIP_GPIO_CTLE);
- tmp &= ~(P3C_RG_SWRST_U3_PHYD_FORCE_EN | P3C_RG_SWRST_U3_PHYD);
- writel(tmp, bank->chip + U3P_U3_CHIP_GPIO_CTLE);
+ mtk_phy_clear_bits(bank->chip + U3P_U3_CHIP_GPIO_CTLE,
+ P3C_RG_SWRST_U3_PHYD_FORCE_EN | P3C_RG_SWRST_U3_PHYD);
}
static void pcie_phy_instance_power_off(struct mtk_tphy *tphy,
@@ -786,15 +716,12 @@ static void pcie_phy_instance_power_off(struct mtk_tphy *tphy,
{
struct u3phy_banks *bank = &instance->u3_banks;
- u32 tmp;
- tmp = readl(bank->chip + U3P_U3_CHIP_GPIO_CTLD);
- tmp |= P3C_FORCE_IP_SW_RST | P3C_REG_IP_SW_RST;
- writel(tmp, bank->chip + U3P_U3_CHIP_GPIO_CTLD);
+ mtk_phy_set_bits(bank->chip + U3P_U3_CHIP_GPIO_CTLD,
+ P3C_FORCE_IP_SW_RST | P3C_REG_IP_SW_RST);
- tmp = readl(bank->chip + U3P_U3_CHIP_GPIO_CTLE);
- tmp |= P3C_RG_SWRST_U3_PHYD_FORCE_EN | P3C_RG_SWRST_U3_PHYD;
- writel(tmp, bank->chip + U3P_U3_CHIP_GPIO_CTLE);
+ mtk_phy_set_bits(bank->chip + U3P_U3_CHIP_GPIO_CTLE,
+ P3C_RG_SWRST_U3_PHYD_FORCE_EN | P3C_RG_SWRST_U3_PHYD);
}
static void sata_phy_instance_init(struct mtk_tphy *tphy,
@@ -802,55 +729,42 @@ static void sata_phy_instance_init(struct mtk_tphy *tphy,
{
struct u3phy_banks *u3_banks = &instance->u3_banks;
void __iomem *phyd = u3_banks->phyd;
- u32 tmp;
/* charge current adjustment */
- tmp = readl(phyd + ANA_RG_CTRL_SIGNAL6);
- tmp &= ~(RG_CDR_BIRLTR_GEN1_MSK | RG_CDR_BC_GEN1_MSK);
- tmp |= RG_CDR_BIRLTR_GEN1_VAL(0x6) | RG_CDR_BC_GEN1_VAL(0x1a);
- writel(tmp, phyd + ANA_RG_CTRL_SIGNAL6);
-
- tmp = readl(phyd + ANA_EQ_EYE_CTRL_SIGNAL4);
- tmp &= ~RG_CDR_BIRLTD0_GEN1_MSK;
- tmp |= RG_CDR_BIRLTD0_GEN1_VAL(0x18);
- writel(tmp, phyd + ANA_EQ_EYE_CTRL_SIGNAL4);
-
- tmp = readl(phyd + ANA_EQ_EYE_CTRL_SIGNAL5);
- tmp &= ~RG_CDR_BIRLTD0_GEN3_MSK;
- tmp |= RG_CDR_BIRLTD0_GEN3_VAL(0x06);
- writel(tmp, phyd + ANA_EQ_EYE_CTRL_SIGNAL5);
-
- tmp = readl(phyd + ANA_RG_CTRL_SIGNAL4);
- tmp &= ~(RG_CDR_BICLTR_GEN1_MSK | RG_CDR_BR_GEN2_MSK);
- tmp |= RG_CDR_BICLTR_GEN1_VAL(0x0c) | RG_CDR_BR_GEN2_VAL(0x07);
- writel(tmp, phyd + ANA_RG_CTRL_SIGNAL4);
-
- tmp = readl(phyd + PHYD_CTRL_SIGNAL_MODE4);
- tmp &= ~(RG_CDR_BICLTD0_GEN1_MSK | RG_CDR_BICLTD1_GEN1_MSK);
- tmp |= RG_CDR_BICLTD0_GEN1_VAL(0x08) | RG_CDR_BICLTD1_GEN1_VAL(0x02);
- writel(tmp, phyd + PHYD_CTRL_SIGNAL_MODE4);
-
- tmp = readl(phyd + PHYD_DESIGN_OPTION2);
- tmp &= ~RG_LOCK_CNT_SEL_MSK;
- tmp |= RG_LOCK_CNT_SEL_VAL(0x02);
- writel(tmp, phyd + PHYD_DESIGN_OPTION2);
-
- tmp = readl(phyd + PHYD_DESIGN_OPTION9);
- tmp &= ~(RG_T2_MIN_MSK | RG_TG_MIN_MSK |
- RG_T2_MAX_MSK | RG_TG_MAX_MSK);
- tmp |= RG_T2_MIN_VAL(0x12) | RG_TG_MIN_VAL(0x04) |
- RG_T2_MAX_VAL(0x31) | RG_TG_MAX_VAL(0x0e);
- writel(tmp, phyd + PHYD_DESIGN_OPTION9);
-
- tmp = readl(phyd + ANA_RG_CTRL_SIGNAL1);
- tmp &= ~RG_IDRV_0DB_GEN1_MSK;
- tmp |= RG_IDRV_0DB_GEN1_VAL(0x20);
- writel(tmp, phyd + ANA_RG_CTRL_SIGNAL1);
-
- tmp = readl(phyd + ANA_EQ_EYE_CTRL_SIGNAL1);
- tmp &= ~RG_EQ_DLEQ_LFI_GEN1_MSK;
- tmp |= RG_EQ_DLEQ_LFI_GEN1_VAL(0x03);
- writel(tmp, phyd + ANA_EQ_EYE_CTRL_SIGNAL1);
+ mtk_phy_update_bits(phyd + ANA_RG_CTRL_SIGNAL6,
+ RG_CDR_BIRLTR_GEN1_MSK | RG_CDR_BC_GEN1_MSK,
+ RG_CDR_BIRLTR_GEN1_VAL(0x6) | RG_CDR_BC_GEN1_VAL(0x1a));
+
+ mtk_phy_update_bits(phyd + ANA_EQ_EYE_CTRL_SIGNAL4, RG_CDR_BIRLTD0_GEN1_MSK,
+ RG_CDR_BIRLTD0_GEN1_VAL(0x18));
+
+ mtk_phy_update_bits(phyd + ANA_EQ_EYE_CTRL_SIGNAL5, RG_CDR_BIRLTD0_GEN3_MSK,
+ RG_CDR_BIRLTD0_GEN3_VAL(0x06));
+
+ mtk_phy_update_bits(phyd + ANA_RG_CTRL_SIGNAL4,
+ RG_CDR_BICLTR_GEN1_MSK | RG_CDR_BR_GEN2_MSK,
+ RG_CDR_BICLTR_GEN1_VAL(0x0c) | RG_CDR_BR_GEN2_VAL(0x07));
+
+ mtk_phy_update_bits(phyd + PHYD_CTRL_SIGNAL_MODE4,
+ RG_CDR_BICLTD0_GEN1_MSK | RG_CDR_BICLTD1_GEN1_MSK,
+ RG_CDR_BICLTD0_GEN1_VAL(0x08) | RG_CDR_BICLTD1_GEN1_VAL(0x02));
+
+ mtk_phy_update_bits(phyd + PHYD_DESIGN_OPTION2, RG_LOCK_CNT_SEL_MSK,
+ RG_LOCK_CNT_SEL_VAL(0x02));
+
+ mtk_phy_update_bits(phyd + PHYD_DESIGN_OPTION9,
+ RG_T2_MIN_MSK | RG_TG_MIN_MSK,
+ RG_T2_MIN_VAL(0x12) | RG_TG_MIN_VAL(0x04));
+
+ mtk_phy_update_bits(phyd + PHYD_DESIGN_OPTION9,
+ RG_T2_MAX_MSK | RG_TG_MAX_MSK,
+ RG_T2_MAX_VAL(0x31) | RG_TG_MAX_VAL(0x0e));
+
+ mtk_phy_update_bits(phyd + ANA_RG_CTRL_SIGNAL1, RG_IDRV_0DB_GEN1_MSK,
+ RG_IDRV_0DB_GEN1_VAL(0x20));
+
+ mtk_phy_update_bits(phyd + ANA_EQ_EYE_CTRL_SIGNAL1, RG_EQ_DLEQ_LFI_GEN1_MSK,
+ RG_EQ_DLEQ_LFI_GEN1_VAL(0x03));
dev_dbg(tphy->dev, "%s(%d)\n", __func__, instance->index);
}
@@ -938,48 +852,29 @@ static void u2_phy_props_set(struct mtk_tphy *tphy,
{
struct u2phy_banks *u2_banks = &instance->u2_banks;
void __iomem *com = u2_banks->com;
- u32 tmp;
- if (instance->bc12_en) {
- tmp = readl(com + U3P_U2PHYBC12C);
- tmp |= P2C_RG_CHGDT_EN; /* BC1.2 path Enable */
- writel(tmp, com + U3P_U2PHYBC12C);
- }
+ if (instance->bc12_en) /* BC1.2 path Enable */
+ mtk_phy_set_bits(com + U3P_U2PHYBC12C, P2C_RG_CHGDT_EN);
- if (tphy->pdata->version < MTK_PHY_V3 && instance->eye_src) {
- tmp = readl(com + U3P_USBPHYACR5);
- tmp &= ~PA5_RG_U2_HSTX_SRCTRL;
- tmp |= PA5_RG_U2_HSTX_SRCTRL_VAL(instance->eye_src);
- writel(tmp, com + U3P_USBPHYACR5);
- }
+ if (tphy->pdata->version < MTK_PHY_V3 && instance->eye_src)
+ mtk_phy_update_bits(com + U3P_USBPHYACR5, PA5_RG_U2_HSTX_SRCTRL,
+ PA5_RG_U2_HSTX_SRCTRL_VAL(instance->eye_src));
- if (instance->eye_vrt) {
- tmp = readl(com + U3P_USBPHYACR1);
- tmp &= ~PA1_RG_VRT_SEL;
- tmp |= PA1_RG_VRT_SEL_VAL(instance->eye_vrt);
- writel(tmp, com + U3P_USBPHYACR1);
- }
+ if (instance->eye_vrt)
+ mtk_phy_update_bits(com + U3P_USBPHYACR1, PA1_RG_VRT_SEL,
+ PA1_RG_VRT_SEL_VAL(instance->eye_vrt));
- if (instance->eye_term) {
- tmp = readl(com + U3P_USBPHYACR1);
- tmp &= ~PA1_RG_TERM_SEL;
- tmp |= PA1_RG_TERM_SEL_VAL(instance->eye_term);
- writel(tmp, com + U3P_USBPHYACR1);
- }
+ if (instance->eye_term)
+ mtk_phy_update_bits(com + U3P_USBPHYACR1, PA1_RG_TERM_SEL,
+ PA1_RG_TERM_SEL_VAL(instance->eye_term));
- if (instance->intr) {
- tmp = readl(com + U3P_USBPHYACR1);
- tmp &= ~PA1_RG_INTR_CAL;
- tmp |= PA1_RG_INTR_CAL_VAL(instance->intr);
- writel(tmp, com + U3P_USBPHYACR1);
- }
+ if (instance->intr)
+ mtk_phy_update_bits(com + U3P_USBPHYACR1, PA1_RG_INTR_CAL,
+ PA1_RG_INTR_CAL_VAL(instance->intr));
- if (instance->discth) {
- tmp = readl(com + U3P_USBPHYACR6);
- tmp &= ~PA6_RG_U2_DISCTH;
- tmp |= PA6_RG_U2_DISCTH_VAL(instance->discth);
- writel(tmp, com + U3P_USBPHYACR6);
- }
+ if (instance->discth)
+ mtk_phy_update_bits(com + U3P_USBPHYACR6, PA6_RG_U2_DISCTH,
+ PA6_RG_U2_DISCTH_VAL(instance->discth));
}
/* type switch for usb3/pcie/sgmii/sata */
@@ -1040,6 +935,117 @@ static int phy_type_set(struct mtk_phy_instance *instance)
return 0;
}
+static int phy_efuse_get(struct mtk_tphy *tphy, struct mtk_phy_instance *instance)
+{
+ struct device *dev = &instance->phy->dev;
+ int ret = 0;
+
+ /* tphy v1 doesn't support sw efuse, skip it */
+ if (!tphy->pdata->sw_efuse_supported) {
+ instance->efuse_sw_en = 0;
+ return 0;
+ }
+
+ /* software efuse is optional */
+ instance->efuse_sw_en = device_property_read_bool(dev, "nvmem-cells");
+ if (!instance->efuse_sw_en)
+ return 0;
+
+ switch (instance->type) {
+ case PHY_TYPE_USB2:
+ ret = nvmem_cell_read_variable_le_u32(dev, "intr", &instance->efuse_intr);
+ if (ret) {
+ dev_err(dev, "fail to get u2 intr efuse, %d\n", ret);
+ break;
+ }
+
+ /* no efuse, ignore it */
+ if (!instance->efuse_intr) {
+ dev_warn(dev, "no u2 intr efuse, but dts enable it\n");
+ instance->efuse_sw_en = 0;
+ break;
+ }
+
+ dev_dbg(dev, "u2 efuse - intr %x\n", instance->efuse_intr);
+ break;
+
+ case PHY_TYPE_USB3:
+ case PHY_TYPE_PCIE:
+ ret = nvmem_cell_read_variable_le_u32(dev, "intr", &instance->efuse_intr);
+ if (ret) {
+ dev_err(dev, "fail to get u3 intr efuse, %d\n", ret);
+ break;
+ }
+
+ ret = nvmem_cell_read_variable_le_u32(dev, "rx_imp", &instance->efuse_rx_imp);
+ if (ret) {
+ dev_err(dev, "fail to get u3 rx_imp efuse, %d\n", ret);
+ break;
+ }
+
+ ret = nvmem_cell_read_variable_le_u32(dev, "tx_imp", &instance->efuse_tx_imp);
+ if (ret) {
+ dev_err(dev, "fail to get u3 tx_imp efuse, %d\n", ret);
+ break;
+ }
+
+ /* no efuse, ignore it */
+ if (!instance->efuse_intr &&
+ !instance->efuse_rx_imp &&
+ !instance->efuse_rx_imp) {
+ dev_warn(dev, "no u3 intr efuse, but dts enable it\n");
+ instance->efuse_sw_en = 0;
+ break;
+ }
+
+ dev_dbg(dev, "u3 efuse - intr %x, rx_imp %x, tx_imp %x\n",
+ instance->efuse_intr, instance->efuse_rx_imp,instance->efuse_tx_imp);
+ break;
+ default:
+ dev_err(dev, "no sw efuse for type %d\n", instance->type);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void phy_efuse_set(struct mtk_phy_instance *instance)
+{
+ struct device *dev = &instance->phy->dev;
+ struct u2phy_banks *u2_banks = &instance->u2_banks;
+ struct u3phy_banks *u3_banks = &instance->u3_banks;
+
+ if (!instance->efuse_sw_en)
+ return;
+
+ switch (instance->type) {
+ case PHY_TYPE_USB2:
+ mtk_phy_set_bits(u2_banks->misc + U3P_MISC_REG1, MR1_EFUSE_AUTO_LOAD_DIS);
+
+ mtk_phy_update_bits(u2_banks->com + U3P_USBPHYACR1, PA1_RG_INTR_CAL,
+ PA1_RG_INTR_CAL_VAL(instance->efuse_intr));
+ break;
+ case PHY_TYPE_USB3:
+ case PHY_TYPE_PCIE:
+ mtk_phy_set_bits(u3_banks->phyd + U3P_U3_PHYD_RSV, P3D_RG_EFUSE_AUTO_LOAD_DIS);
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_IMPCAL0, P3D_RG_TX_IMPEL,
+ P3D_RG_TX_IMPEL_VAL(instance->efuse_tx_imp));
+ mtk_phy_set_bits(u3_banks->phyd + U3P_U3_PHYD_IMPCAL0, P3D_RG_FORCE_TX_IMPEL);
+
+ mtk_phy_update_bits(u3_banks->phyd + U3P_U3_PHYD_IMPCAL1, P3D_RG_RX_IMPEL,
+ P3D_RG_RX_IMPEL_VAL(instance->efuse_rx_imp));
+ mtk_phy_set_bits(u3_banks->phyd + U3P_U3_PHYD_IMPCAL1, P3D_RG_FORCE_RX_IMPEL);
+
+ mtk_phy_update_bits(u3_banks->phya + U3P_U3_PHYA_REG0, P3A_RG_IEXT_INTR,
+ P3A_RG_IEXT_INTR_VAL(instance->efuse_intr));
+ break;
+ default:
+ dev_warn(dev, "no sw efuse for type %d\n", instance->type);
+ break;
+ }
+}
+
static int mtk_phy_init(struct phy *phy)
{
struct mtk_phy_instance *instance = phy_get_drvdata(phy);
@@ -1050,6 +1056,8 @@ static int mtk_phy_init(struct phy *phy)
if (ret)
return ret;
+ phy_efuse_set(instance);
+
switch (instance->type) {
case PHY_TYPE_USB2:
u2_phy_instance_init(tphy, instance);
@@ -1134,6 +1142,7 @@ static struct phy *mtk_phy_xlate(struct device *dev,
struct mtk_phy_instance *instance = NULL;
struct device_node *phy_np = args->np;
int index;
+ int ret;
if (args->args_count != 1) {
dev_err(dev, "invalid number of cells in 'phy' property\n");
@@ -1174,6 +1183,10 @@ static struct phy *mtk_phy_xlate(struct device *dev,
return ERR_PTR(-EINVAL);
}
+ ret = phy_efuse_get(tphy, instance);
+ if (ret)
+ return ERR_PTR(ret);
+
phy_parse_property(tphy, instance);
phy_type_set(instance);
@@ -1196,10 +1209,12 @@ static const struct mtk_phy_pdata tphy_v1_pdata = {
static const struct mtk_phy_pdata tphy_v2_pdata = {
.avoid_rx_sen_degradation = false,
+ .sw_efuse_supported = true,
.version = MTK_PHY_V2,
};
static const struct mtk_phy_pdata tphy_v3_pdata = {
+ .sw_efuse_supported = true,
.version = MTK_PHY_V3,
};
@@ -1210,6 +1225,7 @@ static const struct mtk_phy_pdata mt8173_pdata = {
static const struct mtk_phy_pdata mt8195_pdata = {
.sw_pll_48m_to_26m = true,
+ .sw_efuse_supported = true,
.version = MTK_PHY_V3,
};
diff --git a/drivers/phy/mediatek/phy-mtk-xsphy.c b/drivers/phy/mediatek/phy-mtk-xsphy.c
index 8c51131945c0..c0cdb78f77fa 100644
--- a/drivers/phy/mediatek/phy-mtk-xsphy.c
+++ b/drivers/phy/mediatek/phy-mtk-xsphy.c
@@ -10,13 +10,14 @@
#include <dt-bindings/phy/phy.h>
#include <linux/clk.h>
#include <linux/delay.h>
-#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
+#include "phy-mtk-io.h"
+
/* u2 phy banks */
#define SSUSB_SIFSLV_MISC 0x000
#define SSUSB_SIFSLV_U2FREQ 0x100
@@ -126,26 +127,18 @@ static void u2_phy_slew_rate_calibrate(struct mtk_xsphy *xsphy,
return;
/* enable USB ring oscillator */
- tmp = readl(pbase + XSP_USBPHYACR5);
- tmp |= P2A5_RG_HSTX_SRCAL_EN;
- writel(tmp, pbase + XSP_USBPHYACR5);
+ mtk_phy_set_bits(pbase + XSP_USBPHYACR5, P2A5_RG_HSTX_SRCAL_EN);
udelay(1); /* wait clock stable */
/* enable free run clock */
- tmp = readl(pbase + XSP_U2FREQ_FMMONR1);
- tmp |= P2F_RG_FRCK_EN;
- writel(tmp, pbase + XSP_U2FREQ_FMMONR1);
+ mtk_phy_set_bits(pbase + XSP_U2FREQ_FMMONR1, P2F_RG_FRCK_EN);
/* set cycle count as 1024 */
- tmp = readl(pbase + XSP_U2FREQ_FMCR0);
- tmp &= ~(P2F_RG_CYCLECNT);
- tmp |= P2F_RG_CYCLECNT_VAL(XSP_FM_DET_CYCLE_CNT);
- writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+ mtk_phy_update_bits(pbase + XSP_U2FREQ_FMCR0, P2F_RG_CYCLECNT,
+ P2F_RG_CYCLECNT_VAL(XSP_FM_DET_CYCLE_CNT));
/* enable frequency meter */
- tmp = readl(pbase + XSP_U2FREQ_FMCR0);
- tmp |= P2F_RG_FREQDET_EN;
- writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+ mtk_phy_set_bits(pbase + XSP_U2FREQ_FMCR0, P2F_RG_FREQDET_EN);
/* ignore return value */
readl_poll_timeout(pbase + XSP_U2FREQ_FMMONR1, tmp,
@@ -154,14 +147,10 @@ static void u2_phy_slew_rate_calibrate(struct mtk_xsphy *xsphy,
fm_out = readl(pbase + XSP_U2FREQ_MMONR0);
/* disable frequency meter */
- tmp = readl(pbase + XSP_U2FREQ_FMCR0);
- tmp &= ~P2F_RG_FREQDET_EN;
- writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+ mtk_phy_clear_bits(pbase + XSP_U2FREQ_FMCR0, P2F_RG_FREQDET_EN);
/* disable free run clock */
- tmp = readl(pbase + XSP_U2FREQ_FMMONR1);
- tmp &= ~P2F_RG_FRCK_EN;
- writel(tmp, pbase + XSP_U2FREQ_FMMONR1);
+ mtk_phy_clear_bits(pbase + XSP_U2FREQ_FMMONR1, P2F_RG_FRCK_EN);
if (fm_out) {
/* (1024 / FM_OUT) x reference clock frequency x coefficient */
@@ -177,31 +166,22 @@ static void u2_phy_slew_rate_calibrate(struct mtk_xsphy *xsphy,
xsphy->src_ref_clk, xsphy->src_coef);
/* set HS slew rate */
- tmp = readl(pbase + XSP_USBPHYACR5);
- tmp &= ~P2A5_RG_HSTX_SRCTRL;
- tmp |= P2A5_RG_HSTX_SRCTRL_VAL(calib_val);
- writel(tmp, pbase + XSP_USBPHYACR5);
+ mtk_phy_update_bits(pbase + XSP_USBPHYACR5, P2A5_RG_HSTX_SRCTRL,
+ P2A5_RG_HSTX_SRCTRL_VAL(calib_val));
/* disable USB ring oscillator */
- tmp = readl(pbase + XSP_USBPHYACR5);
- tmp &= ~P2A5_RG_HSTX_SRCAL_EN;
- writel(tmp, pbase + XSP_USBPHYACR5);
+ mtk_phy_clear_bits(pbase + XSP_USBPHYACR5, P2A5_RG_HSTX_SRCAL_EN);
}
static void u2_phy_instance_init(struct mtk_xsphy *xsphy,
struct xsphy_instance *inst)
{
void __iomem *pbase = inst->port_base;
- u32 tmp;
/* DP/DM BC1.1 path Disable */
- tmp = readl(pbase + XSP_USBPHYACR6);
- tmp &= ~P2A6_RG_BC11_SW_EN;
- writel(tmp, pbase + XSP_USBPHYACR6);
+ mtk_phy_clear_bits(pbase + XSP_USBPHYACR6, P2A6_RG_BC11_SW_EN);
- tmp = readl(pbase + XSP_USBPHYACR0);
- tmp |= P2A0_RG_INTR_EN;
- writel(tmp, pbase + XSP_USBPHYACR0);
+ mtk_phy_set_bits(pbase + XSP_USBPHYACR0, P2A0_RG_INTR_EN);
}
static void u2_phy_instance_power_on(struct mtk_xsphy *xsphy,
@@ -209,16 +189,12 @@ static void u2_phy_instance_power_on(struct mtk_xsphy *xsphy,
{
void __iomem *pbase = inst->port_base;
u32 index = inst->index;
- u32 tmp;
- tmp = readl(pbase + XSP_USBPHYACR6);
- tmp |= P2A6_RG_OTG_VBUSCMP_EN;
- writel(tmp, pbase + XSP_USBPHYACR6);
+ mtk_phy_set_bits(pbase + XSP_USBPHYACR6, P2A6_RG_OTG_VBUSCMP_EN);
- tmp = readl(pbase + XSP_U2PHYDTM1);
- tmp |= P2D_RG_VBUSVALID | P2D_RG_AVALID;
- tmp &= ~P2D_RG_SESSEND;
- writel(tmp, pbase + XSP_U2PHYDTM1);
+ mtk_phy_update_bits(pbase + XSP_U2PHYDTM1,
+ P2D_RG_VBUSVALID | P2D_RG_AVALID | P2D_RG_SESSEND,
+ P2D_RG_VBUSVALID | P2D_RG_AVALID);
dev_dbg(xsphy->dev, "%s(%d)\n", __func__, index);
}
@@ -228,16 +204,12 @@ static void u2_phy_instance_power_off(struct mtk_xsphy *xsphy,
{
void __iomem *pbase = inst->port_base;
u32 index = inst->index;
- u32 tmp;
- tmp = readl(pbase + XSP_USBPHYACR6);
- tmp &= ~P2A6_RG_OTG_VBUSCMP_EN;
- writel(tmp, pbase + XSP_USBPHYACR6);
+ mtk_phy_clear_bits(pbase + XSP_USBPHYACR6, P2A6_RG_OTG_VBUSCMP_EN);
- tmp = readl(pbase + XSP_U2PHYDTM1);
- tmp &= ~(P2D_RG_VBUSVALID | P2D_RG_AVALID);
- tmp |= P2D_RG_SESSEND;
- writel(tmp, pbase + XSP_U2PHYDTM1);
+ mtk_phy_update_bits(pbase + XSP_U2PHYDTM1,
+ P2D_RG_VBUSVALID | P2D_RG_AVALID | P2D_RG_SESSEND,
+ P2D_RG_SESSEND);
dev_dbg(xsphy->dev, "%s(%d)\n", __func__, index);
}
@@ -306,63 +278,43 @@ static void u2_phy_props_set(struct mtk_xsphy *xsphy,
struct xsphy_instance *inst)
{
void __iomem *pbase = inst->port_base;
- u32 tmp;
- if (inst->efuse_intr) {
- tmp = readl(pbase + XSP_USBPHYACR1);
- tmp &= ~P2A1_RG_INTR_CAL;
- tmp |= P2A1_RG_INTR_CAL_VAL(inst->efuse_intr);
- writel(tmp, pbase + XSP_USBPHYACR1);
- }
+ if (inst->efuse_intr)
+ mtk_phy_update_bits(pbase + XSP_USBPHYACR1, P2A1_RG_INTR_CAL,
+ P2A1_RG_INTR_CAL_VAL(inst->efuse_intr));
- if (inst->eye_src) {
- tmp = readl(pbase + XSP_USBPHYACR5);
- tmp &= ~P2A5_RG_HSTX_SRCTRL;
- tmp |= P2A5_RG_HSTX_SRCTRL_VAL(inst->eye_src);
- writel(tmp, pbase + XSP_USBPHYACR5);
- }
+ if (inst->eye_src)
+ mtk_phy_update_bits(pbase + XSP_USBPHYACR5, P2A5_RG_HSTX_SRCTRL,
+ P2A5_RG_HSTX_SRCTRL_VAL(inst->eye_src));
- if (inst->eye_vrt) {
- tmp = readl(pbase + XSP_USBPHYACR1);
- tmp &= ~P2A1_RG_VRT_SEL;
- tmp |= P2A1_RG_VRT_SEL_VAL(inst->eye_vrt);
- writel(tmp, pbase + XSP_USBPHYACR1);
- }
+ if (inst->eye_vrt)
+ mtk_phy_update_bits(pbase + XSP_USBPHYACR1, P2A1_RG_VRT_SEL,
+ P2A1_RG_VRT_SEL_VAL(inst->eye_vrt));
- if (inst->eye_term) {
- tmp = readl(pbase + XSP_USBPHYACR1);
- tmp &= ~P2A1_RG_TERM_SEL;
- tmp |= P2A1_RG_TERM_SEL_VAL(inst->eye_term);
- writel(tmp, pbase + XSP_USBPHYACR1);
- }
+ if (inst->eye_term)
+ mtk_phy_update_bits(pbase + XSP_USBPHYACR1, P2A1_RG_TERM_SEL,
+ P2A1_RG_TERM_SEL_VAL(inst->eye_term));
}
static void u3_phy_props_set(struct mtk_xsphy *xsphy,
struct xsphy_instance *inst)
{
void __iomem *pbase = inst->port_base;
- u32 tmp;
- if (inst->efuse_intr) {
- tmp = readl(xsphy->glb_base + SSPXTP_PHYA_GLB_00);
- tmp &= ~RG_XTP_GLB_BIAS_INTR_CTRL;
- tmp |= RG_XTP_GLB_BIAS_INTR_CTRL_VAL(inst->efuse_intr);
- writel(tmp, xsphy->glb_base + SSPXTP_PHYA_GLB_00);
- }
+ if (inst->efuse_intr)
+ mtk_phy_update_bits(xsphy->glb_base + SSPXTP_PHYA_GLB_00,
+ RG_XTP_GLB_BIAS_INTR_CTRL,
+ RG_XTP_GLB_BIAS_INTR_CTRL_VAL(inst->efuse_intr));
- if (inst->efuse_tx_imp) {
- tmp = readl(pbase + SSPXTP_PHYA_LN_04);
- tmp &= ~RG_XTP_LN0_TX_IMPSEL;
- tmp |= RG_XTP_LN0_TX_IMPSEL_VAL(inst->efuse_tx_imp);
- writel(tmp, pbase + SSPXTP_PHYA_LN_04);
- }
+ if (inst->efuse_tx_imp)
+ mtk_phy_update_bits(pbase + SSPXTP_PHYA_LN_04,
+ RG_XTP_LN0_TX_IMPSEL,
+ RG_XTP_LN0_TX_IMPSEL_VAL(inst->efuse_tx_imp));
- if (inst->efuse_rx_imp) {
- tmp = readl(pbase + SSPXTP_PHYA_LN_14);
- tmp &= ~RG_XTP_LN0_RX_IMPSEL;
- tmp |= RG_XTP_LN0_RX_IMPSEL_VAL(inst->efuse_rx_imp);
- writel(tmp, pbase + SSPXTP_PHYA_LN_14);
- }
+ if (inst->efuse_rx_imp)
+ mtk_phy_update_bits(pbase + SSPXTP_PHYA_LN_14,
+ RG_XTP_LN0_RX_IMPSEL,
+ RG_XTP_LN0_RX_IMPSEL_VAL(inst->efuse_rx_imp));
}
static int mtk_phy_init(struct phy *phy)
diff --git a/drivers/phy/microchip/Kconfig b/drivers/phy/microchip/Kconfig
index 3728a284bf64..38039ed0754c 100644
--- a/drivers/phy/microchip/Kconfig
+++ b/drivers/phy/microchip/Kconfig
@@ -11,3 +11,11 @@ config PHY_SPARX5_SERDES
depends on HAS_IOMEM
help
Enable this for support of the 10G/25G SerDes on Microchip Sparx5.
+
+config PHY_LAN966X_SERDES
+ tristate "SerDes PHY driver for Microchip LAN966X"
+ select GENERIC_PHY
+ depends on OF
+ depends on MFD_SYSCON
+ help
+ Enable this for supporting SerDes muxing with Microchip LAN966X
diff --git a/drivers/phy/microchip/Makefile b/drivers/phy/microchip/Makefile
index 7b98345712aa..fd73b87960a5 100644
--- a/drivers/phy/microchip/Makefile
+++ b/drivers/phy/microchip/Makefile
@@ -4,3 +4,4 @@
#
obj-$(CONFIG_PHY_SPARX5_SERDES) := sparx5_serdes.o
+obj-$(CONFIG_PHY_LAN966X_SERDES) := lan966x_serdes.o
diff --git a/drivers/phy/microchip/lan966x_serdes.c b/drivers/phy/microchip/lan966x_serdes.c
new file mode 100644
index 000000000000..e86a879b92b5
--- /dev/null
+++ b/drivers/phy/microchip/lan966x_serdes.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/phy/phy-lan966x-serdes.h>
+#include "lan966x_serdes_regs.h"
+
+#define PLL_CONF_MASK GENMASK(4, 3)
+#define PLL_CONF_25MHZ 0
+#define PLL_CONF_125MHZ 1
+#define PLL_CONF_SERDES_125MHZ 2
+#define PLL_CONF_BYPASS 3
+
+#define lan_offset_(id, tinst, tcnt, \
+ gbase, ginst, gcnt, gwidth, \
+ raddr, rinst, rcnt, rwidth) \
+ (gbase + ((ginst) * gwidth) + raddr + ((rinst) * rwidth))
+#define lan_offset(...) lan_offset_(__VA_ARGS__)
+
+#define lan_rmw(val, mask, reg, off) \
+ lan_rmw_(val, mask, reg, lan_offset(off))
+
+#define SERDES_MUX(_idx, _port, _mode, _submode, _mask, _mux) { \
+ .idx = _idx, \
+ .port = _port, \
+ .mode = _mode, \
+ .submode = _submode, \
+ .mask = _mask, \
+ .mux = _mux, \
+}
+
+#define SERDES_MUX_GMII(i, p, m, c) \
+ SERDES_MUX(i, p, PHY_MODE_ETHERNET, PHY_INTERFACE_MODE_GMII, m, c)
+#define SERDES_MUX_SGMII(i, p, m, c) \
+ SERDES_MUX(i, p, PHY_MODE_ETHERNET, PHY_INTERFACE_MODE_SGMII, m, c)
+#define SERDES_MUX_QSGMII(i, p, m, c) \
+ SERDES_MUX(i, p, PHY_MODE_ETHERNET, PHY_INTERFACE_MODE_QSGMII, m, c)
+#define SERDES_MUX_RGMII(i, p, m, c) \
+ SERDES_MUX(i, p, PHY_MODE_ETHERNET, PHY_INTERFACE_MODE_RGMII, m, c)
+
+static void lan_rmw_(u32 val, u32 mask, void __iomem *mem, u32 offset)
+{
+ u32 v;
+
+ v = readl(mem + offset);
+ v = (v & ~mask) | (val & mask);
+ writel(v, mem + offset);
+}
+
+struct serdes_mux {
+ u8 idx;
+ u8 port;
+ enum phy_mode mode;
+ int submode;
+ u32 mask;
+ u32 mux;
+};
+
+static const struct serdes_mux lan966x_serdes_muxes[] = {
+ SERDES_MUX_QSGMII(SERDES6G(1), 0, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(0))),
+ SERDES_MUX_QSGMII(SERDES6G(1), 1, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(0))),
+ SERDES_MUX_QSGMII(SERDES6G(1), 2, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(0))),
+ SERDES_MUX_QSGMII(SERDES6G(1), 3, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(0))),
+
+ SERDES_MUX_QSGMII(SERDES6G(2), 4, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(1))),
+ SERDES_MUX_QSGMII(SERDES6G(2), 5, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(1))),
+ SERDES_MUX_QSGMII(SERDES6G(2), 6, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(1))),
+ SERDES_MUX_QSGMII(SERDES6G(2), 7, HSIO_HW_CFG_QSGMII_ENA,
+ HSIO_HW_CFG_QSGMII_ENA_SET(BIT(1))),
+
+ SERDES_MUX_GMII(CU(0), 0, HSIO_HW_CFG_GMII_ENA,
+ HSIO_HW_CFG_GMII_ENA_SET(BIT(0))),
+ SERDES_MUX_GMII(CU(1), 1, HSIO_HW_CFG_GMII_ENA,
+ HSIO_HW_CFG_GMII_ENA_SET(BIT(1))),
+
+ SERDES_MUX_SGMII(SERDES6G(0), 0, HSIO_HW_CFG_SD6G_0_CFG, 0),
+ SERDES_MUX_SGMII(SERDES6G(1), 1, HSIO_HW_CFG_SD6G_1_CFG, 0),
+ SERDES_MUX_SGMII(SERDES6G(0), 2, HSIO_HW_CFG_SD6G_0_CFG,
+ HSIO_HW_CFG_SD6G_0_CFG_SET(1)),
+ SERDES_MUX_SGMII(SERDES6G(1), 3, HSIO_HW_CFG_SD6G_1_CFG,
+ HSIO_HW_CFG_SD6G_1_CFG_SET(1)),
+
+ SERDES_MUX_RGMII(RGMII(0), 2, HSIO_HW_CFG_RGMII_0_CFG |
+ HSIO_HW_CFG_RGMII_ENA,
+ HSIO_HW_CFG_RGMII_0_CFG_SET(BIT(0)) |
+ HSIO_HW_CFG_RGMII_ENA_SET(BIT(0))),
+ SERDES_MUX_RGMII(RGMII(1), 3, HSIO_HW_CFG_RGMII_1_CFG |
+ HSIO_HW_CFG_RGMII_ENA,
+ HSIO_HW_CFG_RGMII_1_CFG_SET(BIT(0)) |
+ HSIO_HW_CFG_RGMII_ENA_SET(BIT(1))),
+ SERDES_MUX_RGMII(RGMII(0), 5, HSIO_HW_CFG_RGMII_0_CFG |
+ HSIO_HW_CFG_RGMII_ENA,
+ HSIO_HW_CFG_RGMII_0_CFG_SET(BIT(0)) |
+ HSIO_HW_CFG_RGMII_ENA_SET(BIT(0))),
+ SERDES_MUX_RGMII(RGMII(1), 6, HSIO_HW_CFG_RGMII_1_CFG |
+ HSIO_HW_CFG_RGMII_ENA,
+ HSIO_HW_CFG_RGMII_1_CFG_SET(BIT(0)) |
+ HSIO_HW_CFG_RGMII_ENA_SET(BIT(1))),
+};
+
+struct serdes_ctrl {
+ void __iomem *regs;
+ struct device *dev;
+ struct phy *phys[SERDES_MAX];
+ int ref125;
+};
+
+struct serdes_macro {
+ u8 idx;
+ int port;
+ struct serdes_ctrl *ctrl;
+ int speed;
+ phy_interface_t mode;
+};
+
+enum lan966x_sd6g40_mode {
+ LAN966X_SD6G40_MODE_QSGMII,
+ LAN966X_SD6G40_MODE_SGMII,
+};
+
+enum lan966x_sd6g40_ltx2rx {
+ LAN966X_SD6G40_TX2RX_LOOP_NONE,
+ LAN966X_SD6G40_LTX2RX
+};
+
+struct lan966x_sd6g40_setup_args {
+ enum lan966x_sd6g40_mode mode;
+ enum lan966x_sd6g40_ltx2rx tx2rx_loop;
+ bool txinvert;
+ bool rxinvert;
+ bool refclk125M;
+ bool mute;
+};
+
+struct lan966x_sd6g40_mode_args {
+ enum lan966x_sd6g40_mode mode;
+ u8 lane_10bit_sel;
+ u8 mpll_multiplier;
+ u8 ref_clkdiv2;
+ u8 tx_rate;
+ u8 rx_rate;
+};
+
+struct lan966x_sd6g40_setup {
+ u8 rx_term_en;
+ u8 lane_10bit_sel;
+ u8 tx_invert;
+ u8 rx_invert;
+ u8 mpll_multiplier;
+ u8 lane_loopbk_en;
+ u8 ref_clkdiv2;
+ u8 tx_rate;
+ u8 rx_rate;
+};
+
+static int lan966x_sd6g40_reg_cfg(struct serdes_macro *macro,
+ struct lan966x_sd6g40_setup *res_struct,
+ u32 idx)
+{
+ u32 value;
+
+ /* Note: SerDes HSIO is configured in 1G_LAN mode */
+ lan_rmw(HSIO_SD_CFG_LANE_10BIT_SEL_SET(res_struct->lane_10bit_sel) |
+ HSIO_SD_CFG_RX_RATE_SET(res_struct->rx_rate) |
+ HSIO_SD_CFG_TX_RATE_SET(res_struct->tx_rate) |
+ HSIO_SD_CFG_TX_INVERT_SET(res_struct->tx_invert) |
+ HSIO_SD_CFG_RX_INVERT_SET(res_struct->rx_invert) |
+ HSIO_SD_CFG_LANE_LOOPBK_EN_SET(res_struct->lane_loopbk_en) |
+ HSIO_SD_CFG_RX_RESET_SET(0) |
+ HSIO_SD_CFG_TX_RESET_SET(0),
+ HSIO_SD_CFG_LANE_10BIT_SEL |
+ HSIO_SD_CFG_RX_RATE |
+ HSIO_SD_CFG_TX_RATE |
+ HSIO_SD_CFG_TX_INVERT |
+ HSIO_SD_CFG_RX_INVERT |
+ HSIO_SD_CFG_LANE_LOOPBK_EN |
+ HSIO_SD_CFG_RX_RESET |
+ HSIO_SD_CFG_TX_RESET,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ lan_rmw(HSIO_MPLL_CFG_MPLL_MULTIPLIER_SET(res_struct->mpll_multiplier) |
+ HSIO_MPLL_CFG_REF_CLKDIV2_SET(res_struct->ref_clkdiv2),
+ HSIO_MPLL_CFG_MPLL_MULTIPLIER |
+ HSIO_MPLL_CFG_REF_CLKDIV2,
+ macro->ctrl->regs, HSIO_MPLL_CFG(idx));
+
+ lan_rmw(HSIO_SD_CFG_RX_TERM_EN_SET(res_struct->rx_term_en),
+ HSIO_SD_CFG_RX_TERM_EN,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ lan_rmw(HSIO_MPLL_CFG_REF_SSP_EN_SET(1),
+ HSIO_MPLL_CFG_REF_SSP_EN,
+ macro->ctrl->regs, HSIO_MPLL_CFG(idx));
+
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+
+ lan_rmw(HSIO_SD_CFG_PHY_RESET_SET(0),
+ HSIO_SD_CFG_PHY_RESET,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+
+ lan_rmw(HSIO_MPLL_CFG_MPLL_EN_SET(1),
+ HSIO_MPLL_CFG_MPLL_EN,
+ macro->ctrl->regs, HSIO_MPLL_CFG(idx));
+
+ usleep_range(7 * USEC_PER_MSEC, 8 * USEC_PER_MSEC);
+
+ value = readl(macro->ctrl->regs + lan_offset(HSIO_SD_STAT(idx)));
+ value = HSIO_SD_STAT_MPLL_STATE_GET(value);
+ if (value != 0x1) {
+ dev_err(macro->ctrl->dev,
+ "Unexpected sd_sd_stat[%u] mpll_state was 0x1 but is 0x%x\n",
+ idx, value);
+ return -EIO;
+ }
+
+ lan_rmw(HSIO_SD_CFG_TX_CM_EN_SET(1),
+ HSIO_SD_CFG_TX_CM_EN,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+
+ value = readl(macro->ctrl->regs + lan_offset(HSIO_SD_STAT(idx)));
+ value = HSIO_SD_STAT_TX_CM_STATE_GET(value);
+ if (value != 0x1) {
+ dev_err(macro->ctrl->dev,
+ "Unexpected sd_sd_stat[%u] tx_cm_state was 0x1 but is 0x%x\n",
+ idx, value);
+ return -EIO;
+ }
+
+ lan_rmw(HSIO_SD_CFG_RX_PLL_EN_SET(1) |
+ HSIO_SD_CFG_TX_EN_SET(1),
+ HSIO_SD_CFG_RX_PLL_EN |
+ HSIO_SD_CFG_TX_EN,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+
+ /* Waiting for serdes 0 rx DPLL to lock... */
+ value = readl(macro->ctrl->regs + lan_offset(HSIO_SD_STAT(idx)));
+ value = HSIO_SD_STAT_RX_PLL_STATE_GET(value);
+ if (value != 0x1) {
+ dev_err(macro->ctrl->dev,
+ "Unexpected sd_sd_stat[%u] rx_pll_state was 0x1 but is 0x%x\n",
+ idx, value);
+ return -EIO;
+ }
+
+ /* Waiting for serdes 0 tx operational... */
+ value = readl(macro->ctrl->regs + lan_offset(HSIO_SD_STAT(idx)));
+ value = HSIO_SD_STAT_TX_STATE_GET(value);
+ if (value != 0x1) {
+ dev_err(macro->ctrl->dev,
+ "Unexpected sd_sd_stat[%u] tx_state was 0x1 but is 0x%x\n",
+ idx, value);
+ return -EIO;
+ }
+
+ lan_rmw(HSIO_SD_CFG_TX_DATA_EN_SET(1) |
+ HSIO_SD_CFG_RX_DATA_EN_SET(1),
+ HSIO_SD_CFG_TX_DATA_EN |
+ HSIO_SD_CFG_RX_DATA_EN,
+ macro->ctrl->regs, HSIO_SD_CFG(idx));
+
+ return 0;
+}
+
+static int lan966x_sd6g40_get_conf_from_mode(struct serdes_macro *macro,
+ enum lan966x_sd6g40_mode f_mode,
+ bool ref125M,
+ struct lan966x_sd6g40_mode_args *ret_val)
+{
+ switch (f_mode) {
+ case LAN966X_SD6G40_MODE_QSGMII:
+ ret_val->lane_10bit_sel = 0;
+ if (ref125M) {
+ ret_val->mpll_multiplier = 40;
+ ret_val->ref_clkdiv2 = 0x1;
+ ret_val->tx_rate = 0x0;
+ ret_val->rx_rate = 0x0;
+ } else {
+ ret_val->mpll_multiplier = 100;
+ ret_val->ref_clkdiv2 = 0x0;
+ ret_val->tx_rate = 0x0;
+ ret_val->rx_rate = 0x0;
+ }
+ break;
+
+ case LAN966X_SD6G40_MODE_SGMII:
+ ret_val->lane_10bit_sel = 1;
+ if (ref125M) {
+ ret_val->mpll_multiplier = macro->speed == SPEED_2500 ? 50 : 40;
+ ret_val->ref_clkdiv2 = 0x1;
+ ret_val->tx_rate = macro->speed == SPEED_2500 ? 0x1 : 0x2;
+ ret_val->rx_rate = macro->speed == SPEED_2500 ? 0x1 : 0x2;
+ } else {
+ ret_val->mpll_multiplier = macro->speed == SPEED_2500 ? 125 : 100;
+ ret_val->ref_clkdiv2 = 0x0;
+ ret_val->tx_rate = macro->speed == SPEED_2500 ? 0x1 : 0x2;
+ ret_val->rx_rate = macro->speed == SPEED_2500 ? 0x1 : 0x2;
+ }
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int lan966x_calc_sd6g40_setup_lane(struct serdes_macro *macro,
+ struct lan966x_sd6g40_setup_args config,
+ struct lan966x_sd6g40_setup *ret_val)
+{
+ struct lan966x_sd6g40_mode_args sd6g40_mode;
+ struct lan966x_sd6g40_mode_args *mode_args = &sd6g40_mode;
+ int ret;
+
+ ret = lan966x_sd6g40_get_conf_from_mode(macro, config.mode,
+ config.refclk125M, mode_args);
+ if (ret)
+ return ret;
+
+ ret_val->lane_10bit_sel = mode_args->lane_10bit_sel;
+ ret_val->rx_rate = mode_args->rx_rate;
+ ret_val->tx_rate = mode_args->tx_rate;
+ ret_val->mpll_multiplier = mode_args->mpll_multiplier;
+ ret_val->ref_clkdiv2 = mode_args->ref_clkdiv2;
+ ret_val->rx_term_en = 0;
+
+ if (config.tx2rx_loop == LAN966X_SD6G40_LTX2RX)
+ ret_val->lane_loopbk_en = 1;
+ else
+ ret_val->lane_loopbk_en = 0;
+
+ ret_val->tx_invert = !!config.txinvert;
+ ret_val->rx_invert = !!config.rxinvert;
+
+ return 0;
+}
+
+static int lan966x_sd6g40_setup_lane(struct serdes_macro *macro,
+ struct lan966x_sd6g40_setup_args config,
+ u32 idx)
+{
+ struct lan966x_sd6g40_setup calc_results = {};
+ int ret;
+
+ ret = lan966x_calc_sd6g40_setup_lane(macro, config, &calc_results);
+ if (ret)
+ return ret;
+
+ return lan966x_sd6g40_reg_cfg(macro, &calc_results, idx);
+}
+
+static int lan966x_sd6g40_setup(struct serdes_macro *macro, u32 idx, int mode)
+{
+ struct lan966x_sd6g40_setup_args conf = {};
+
+ conf.refclk125M = macro->ctrl->ref125;
+
+ if (mode == PHY_INTERFACE_MODE_QSGMII)
+ conf.mode = LAN966X_SD6G40_MODE_QSGMII;
+ else
+ conf.mode = LAN966X_SD6G40_MODE_SGMII;
+
+ return lan966x_sd6g40_setup_lane(macro, conf, idx);
+}
+
+static int serdes_set_mode(struct phy *phy, enum phy_mode mode, int submode)
+{
+ struct serdes_macro *macro = phy_get_drvdata(phy);
+ unsigned int i;
+ int val;
+
+ /* As of now only PHY_MODE_ETHERNET is supported */
+ if (mode != PHY_MODE_ETHERNET)
+ return -EOPNOTSUPP;
+
+ if (submode == PHY_INTERFACE_MODE_2500BASEX)
+ macro->speed = SPEED_2500;
+ else
+ macro->speed = SPEED_1000;
+
+ if (submode == PHY_INTERFACE_MODE_1000BASEX ||
+ submode == PHY_INTERFACE_MODE_2500BASEX)
+ submode = PHY_INTERFACE_MODE_SGMII;
+
+ for (i = 0; i < ARRAY_SIZE(lan966x_serdes_muxes); i++) {
+ if (macro->idx != lan966x_serdes_muxes[i].idx ||
+ mode != lan966x_serdes_muxes[i].mode ||
+ submode != lan966x_serdes_muxes[i].submode ||
+ macro->port != lan966x_serdes_muxes[i].port)
+ continue;
+
+ val = readl(macro->ctrl->regs + lan_offset(HSIO_HW_CFG));
+ val |= lan966x_serdes_muxes[i].mux;
+ lan_rmw(val, lan966x_serdes_muxes[i].mask,
+ macro->ctrl->regs, HSIO_HW_CFG);
+
+ macro->mode = lan966x_serdes_muxes[i].submode;
+
+ if (macro->idx < CU_MAX)
+ return 0;
+
+ if (macro->idx < SERDES6G_MAX)
+ return lan966x_sd6g40_setup(macro,
+ macro->idx - (CU_MAX + 1),
+ macro->mode);
+
+ if (macro->idx < RGMII_MAX)
+ return 0;
+
+ return -EOPNOTSUPP;
+ }
+
+ return -EINVAL;
+}
+
+static const struct phy_ops serdes_ops = {
+ .set_mode = serdes_set_mode,
+ .owner = THIS_MODULE,
+};
+
+static struct phy *serdes_simple_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ struct serdes_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned int port, idx, i;
+
+ if (args->args_count != 2)
+ return ERR_PTR(-EINVAL);
+
+ port = args->args[0];
+ idx = args->args[1];
+
+ for (i = 0; i < SERDES_MAX; i++) {
+ struct serdes_macro *macro = phy_get_drvdata(ctrl->phys[i]);
+
+ if (idx != macro->idx)
+ continue;
+
+ macro->port = port;
+ return ctrl->phys[i];
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static int serdes_phy_create(struct serdes_ctrl *ctrl, u8 idx, struct phy **phy)
+{
+ struct serdes_macro *macro;
+
+ *phy = devm_phy_create(ctrl->dev, NULL, &serdes_ops);
+ if (IS_ERR(*phy))
+ return PTR_ERR(*phy);
+
+ macro = devm_kzalloc(ctrl->dev, sizeof(*macro), GFP_KERNEL);
+ if (!macro)
+ return -ENOMEM;
+
+ macro->idx = idx;
+ macro->ctrl = ctrl;
+ macro->port = -1;
+
+ phy_set_drvdata(*phy, macro);
+
+ return 0;
+}
+
+static int serdes_probe(struct platform_device *pdev)
+{
+ struct phy_provider *provider;
+ struct serdes_ctrl *ctrl;
+ void __iomem *hw_stat;
+ unsigned int i;
+ u32 val;
+ int ret;
+
+ ctrl = devm_kzalloc(&pdev->dev, sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ ctrl->dev = &pdev->dev;
+ ctrl->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ if (IS_ERR(ctrl->regs))
+ return PTR_ERR(ctrl->regs);
+
+ hw_stat = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
+ if (IS_ERR(hw_stat))
+ return PTR_ERR(hw_stat);
+
+ for (i = 0; i < SERDES_MAX; i++) {
+ ret = serdes_phy_create(ctrl, i, &ctrl->phys[i]);
+ if (ret)
+ return ret;
+ }
+
+ val = readl(hw_stat);
+ val = FIELD_GET(PLL_CONF_MASK, val);
+ ctrl->ref125 = (val == PLL_CONF_125MHZ ||
+ val == PLL_CONF_SERDES_125MHZ);
+
+ dev_set_drvdata(&pdev->dev, ctrl);
+
+ provider = devm_of_phy_provider_register(ctrl->dev,
+ serdes_simple_xlate);
+
+ return PTR_ERR_OR_ZERO(provider);
+}
+
+static const struct of_device_id serdes_ids[] = {
+ { .compatible = "microchip,lan966x-serdes", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, serdes_ids);
+
+static struct platform_driver mscc_lan966x_serdes = {
+ .probe = serdes_probe,
+ .driver = {
+ .name = "microchip,lan966x-serdes",
+ .of_match_table = of_match_ptr(serdes_ids),
+ },
+};
+
+module_platform_driver(mscc_lan966x_serdes);
+
+MODULE_DESCRIPTION("Microchip lan966x switch serdes driver");
+MODULE_AUTHOR("Horatiu Vultur <horatiu.vultur@microchip.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/microchip/lan966x_serdes_regs.h b/drivers/phy/microchip/lan966x_serdes_regs.h
new file mode 100644
index 000000000000..ea30f64ffd5c
--- /dev/null
+++ b/drivers/phy/microchip/lan966x_serdes_regs.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LAN966X_SERDES_REGS_H_
+#define _LAN966X_SERDES_REGS_H_
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+enum lan966x_target {
+ TARGET_HSIO = 32,
+ NUM_TARGETS = 66
+};
+
+#define __REG(...) __VA_ARGS__
+
+/* HSIO:SD:SD_CFG */
+#define HSIO_SD_CFG(g) __REG(TARGET_HSIO, 0, 1, 8, g, 3, 32, 0, 0, 1, 4)
+
+#define HSIO_SD_CFG_PHY_RESET BIT(27)
+#define HSIO_SD_CFG_PHY_RESET_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_PHY_RESET, x)
+#define HSIO_SD_CFG_PHY_RESET_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_PHY_RESET, x)
+
+#define HSIO_SD_CFG_TX_RESET BIT(18)
+#define HSIO_SD_CFG_TX_RESET_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_RESET, x)
+#define HSIO_SD_CFG_TX_RESET_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_RESET, x)
+
+#define HSIO_SD_CFG_TX_RATE GENMASK(17, 16)
+#define HSIO_SD_CFG_TX_RATE_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_RATE, x)
+#define HSIO_SD_CFG_TX_RATE_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_RATE, x)
+
+#define HSIO_SD_CFG_TX_INVERT BIT(15)
+#define HSIO_SD_CFG_TX_INVERT_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_INVERT, x)
+#define HSIO_SD_CFG_TX_INVERT_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_INVERT, x)
+
+#define HSIO_SD_CFG_TX_EN BIT(14)
+#define HSIO_SD_CFG_TX_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_EN, x)
+#define HSIO_SD_CFG_TX_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_EN, x)
+
+#define HSIO_SD_CFG_TX_DATA_EN BIT(12)
+#define HSIO_SD_CFG_TX_DATA_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_DATA_EN, x)
+#define HSIO_SD_CFG_TX_DATA_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_DATA_EN, x)
+
+#define HSIO_SD_CFG_TX_CM_EN BIT(11)
+#define HSIO_SD_CFG_TX_CM_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_TX_CM_EN, x)
+#define HSIO_SD_CFG_TX_CM_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_TX_CM_EN, x)
+
+#define HSIO_SD_CFG_LANE_10BIT_SEL BIT(10)
+#define HSIO_SD_CFG_LANE_10BIT_SEL_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_LANE_10BIT_SEL, x)
+#define HSIO_SD_CFG_LANE_10BIT_SEL_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_LANE_10BIT_SEL, x)
+
+#define HSIO_SD_CFG_RX_TERM_EN BIT(9)
+#define HSIO_SD_CFG_RX_TERM_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_TERM_EN, x)
+#define HSIO_SD_CFG_RX_TERM_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_TERM_EN, x)
+
+#define HSIO_SD_CFG_RX_RESET BIT(8)
+#define HSIO_SD_CFG_RX_RESET_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_RESET, x)
+#define HSIO_SD_CFG_RX_RESET_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_RESET, x)
+
+#define HSIO_SD_CFG_RX_RATE GENMASK(7, 6)
+#define HSIO_SD_CFG_RX_RATE_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_RATE, x)
+#define HSIO_SD_CFG_RX_RATE_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_RATE, x)
+
+#define HSIO_SD_CFG_RX_PLL_EN BIT(5)
+#define HSIO_SD_CFG_RX_PLL_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_PLL_EN, x)
+#define HSIO_SD_CFG_RX_PLL_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_PLL_EN, x)
+
+#define HSIO_SD_CFG_RX_INVERT BIT(3)
+#define HSIO_SD_CFG_RX_INVERT_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_INVERT, x)
+#define HSIO_SD_CFG_RX_INVERT_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_INVERT, x)
+
+#define HSIO_SD_CFG_RX_DATA_EN BIT(2)
+#define HSIO_SD_CFG_RX_DATA_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_RX_DATA_EN, x)
+#define HSIO_SD_CFG_RX_DATA_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_RX_DATA_EN, x)
+
+#define HSIO_SD_CFG_LANE_LOOPBK_EN BIT(0)
+#define HSIO_SD_CFG_LANE_LOOPBK_EN_SET(x)\
+ FIELD_PREP(HSIO_SD_CFG_LANE_LOOPBK_EN, x)
+#define HSIO_SD_CFG_LANE_LOOPBK_EN_GET(x)\
+ FIELD_GET(HSIO_SD_CFG_LANE_LOOPBK_EN, x)
+
+/* HSIO:SD:MPLL_CFG */
+#define HSIO_MPLL_CFG(g) __REG(TARGET_HSIO, 0, 1, 8, g, 3, 32, 8, 0, 1, 4)
+
+#define HSIO_MPLL_CFG_REF_SSP_EN BIT(18)
+#define HSIO_MPLL_CFG_REF_SSP_EN_SET(x)\
+ FIELD_PREP(HSIO_MPLL_CFG_REF_SSP_EN, x)
+#define HSIO_MPLL_CFG_REF_SSP_EN_GET(x)\
+ FIELD_GET(HSIO_MPLL_CFG_REF_SSP_EN, x)
+
+#define HSIO_MPLL_CFG_REF_CLKDIV2 BIT(17)
+#define HSIO_MPLL_CFG_REF_CLKDIV2_SET(x)\
+ FIELD_PREP(HSIO_MPLL_CFG_REF_CLKDIV2, x)
+#define HSIO_MPLL_CFG_REF_CLKDIV2_GET(x)\
+ FIELD_GET(HSIO_MPLL_CFG_REF_CLKDIV2, x)
+
+#define HSIO_MPLL_CFG_MPLL_EN BIT(16)
+#define HSIO_MPLL_CFG_MPLL_EN_SET(x)\
+ FIELD_PREP(HSIO_MPLL_CFG_MPLL_EN, x)
+#define HSIO_MPLL_CFG_MPLL_EN_GET(x)\
+ FIELD_GET(HSIO_MPLL_CFG_MPLL_EN, x)
+
+#define HSIO_MPLL_CFG_MPLL_MULTIPLIER GENMASK(6, 0)
+#define HSIO_MPLL_CFG_MPLL_MULTIPLIER_SET(x)\
+ FIELD_PREP(HSIO_MPLL_CFG_MPLL_MULTIPLIER, x)
+#define HSIO_MPLL_CFG_MPLL_MULTIPLIER_GET(x)\
+ FIELD_GET(HSIO_MPLL_CFG_MPLL_MULTIPLIER, x)
+
+/* HSIO:SD:SD_STAT */
+#define HSIO_SD_STAT(g) __REG(TARGET_HSIO, 0, 1, 8, g, 3, 32, 12, 0, 1, 4)
+
+#define HSIO_SD_STAT_MPLL_STATE BIT(6)
+#define HSIO_SD_STAT_MPLL_STATE_SET(x)\
+ FIELD_PREP(HSIO_SD_STAT_MPLL_STATE, x)
+#define HSIO_SD_STAT_MPLL_STATE_GET(x)\
+ FIELD_GET(HSIO_SD_STAT_MPLL_STATE, x)
+
+#define HSIO_SD_STAT_TX_STATE BIT(5)
+#define HSIO_SD_STAT_TX_STATE_SET(x)\
+ FIELD_PREP(HSIO_SD_STAT_TX_STATE, x)
+#define HSIO_SD_STAT_TX_STATE_GET(x)\
+ FIELD_GET(HSIO_SD_STAT_TX_STATE, x)
+
+#define HSIO_SD_STAT_TX_CM_STATE BIT(2)
+#define HSIO_SD_STAT_TX_CM_STATE_SET(x)\
+ FIELD_PREP(HSIO_SD_STAT_TX_CM_STATE, x)
+#define HSIO_SD_STAT_TX_CM_STATE_GET(x)\
+ FIELD_GET(HSIO_SD_STAT_TX_CM_STATE, x)
+
+#define HSIO_SD_STAT_RX_PLL_STATE BIT(0)
+#define HSIO_SD_STAT_RX_PLL_STATE_SET(x)\
+ FIELD_PREP(HSIO_SD_STAT_RX_PLL_STATE, x)
+#define HSIO_SD_STAT_RX_PLL_STATE_GET(x)\
+ FIELD_GET(HSIO_SD_STAT_RX_PLL_STATE, x)
+
+/* HSIO:HW_CFGSTAT:HW_CFG */
+#define HSIO_HW_CFG __REG(TARGET_HSIO, 0, 1, 104, 0, 1, 52, 0, 0, 1, 4)
+
+#define HSIO_HW_CFG_RGMII_1_CFG BIT(15)
+#define HSIO_HW_CFG_RGMII_1_CFG_SET(x)\
+ (((x) << 15) & GENMASK(15, 15))
+#define HSIO_HW_CFG_RGMII_1_CFG_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_RGMII_1_CFG, x)
+
+#define HSIO_HW_CFG_RGMII_0_CFG BIT(14)
+#define HSIO_HW_CFG_RGMII_0_CFG_SET(x)\
+ (((x) << 14) & GENMASK(14, 14))
+#define HSIO_HW_CFG_RGMII_0_CFG_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_RGMII_0_CFG, x)
+
+#define HSIO_HW_CFG_RGMII_ENA GENMASK(13, 12)
+#define HSIO_HW_CFG_RGMII_ENA_SET(x)\
+ (((x) << 12) & GENMASK(13, 12))
+#define HSIO_HW_CFG_RGMII_ENA_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_RGMII_ENA, x)
+
+#define HSIO_HW_CFG_SD6G_0_CFG BIT(11)
+#define HSIO_HW_CFG_SD6G_0_CFG_SET(x)\
+ (((x) << 11) & GENMASK(11, 11))
+#define HSIO_HW_CFG_SD6G_0_CFG_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_SD6G_0_CFG, x)
+
+#define HSIO_HW_CFG_SD6G_1_CFG BIT(10)
+#define HSIO_HW_CFG_SD6G_1_CFG_SET(x)\
+ (((x) << 10) & GENMASK(10, 10))
+#define HSIO_HW_CFG_SD6G_1_CFG_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_SD6G_1_CFG, x)
+
+#define HSIO_HW_CFG_GMII_ENA GENMASK(9, 2)
+#define HSIO_HW_CFG_GMII_ENA_SET(x)\
+ (((x) << 2) & GENMASK(9, 2))
+#define HSIO_HW_CFG_GMII_ENA_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_GMII_ENA, x)
+
+#define HSIO_HW_CFG_QSGMII_ENA GENMASK(1, 0)
+#define HSIO_HW_CFG_QSGMII_ENA_SET(x)\
+ ((x) & GENMASK(1, 0))
+#define HSIO_HW_CFG_QSGMII_ENA_GET(x)\
+ FIELD_GET(HSIO_HW_CFG_QSGMII_ENA, x)
+
+#endif /* _LAN966X_HSIO_REGS_H_ */
diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c
index c2cb93b4df71..6f3fe37dee0e 100644
--- a/drivers/phy/phy-can-transceiver.c
+++ b/drivers/phy/phy-can-transceiver.c
@@ -110,14 +110,14 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
can_transceiver_phy->generic_phy = phy;
if (drvdata->flags & CAN_TRANSCEIVER_STB_PRESENT) {
- standby_gpio = devm_gpiod_get(dev, "standby", GPIOD_OUT_HIGH);
+ standby_gpio = devm_gpiod_get_optional(dev, "standby", GPIOD_OUT_HIGH);
if (IS_ERR(standby_gpio))
return PTR_ERR(standby_gpio);
can_transceiver_phy->standby_gpio = standby_gpio;
}
if (drvdata->flags & CAN_TRANSCEIVER_EN_PRESENT) {
- enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+ enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(enable_gpio))
return PTR_ERR(enable_gpio);
can_transceiver_phy->enable_gpio = enable_gpio;
diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index 7f6fcb8ec5ba..5c98850f5a36 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -18,6 +18,16 @@ config PHY_QCOM_APQ8064_SATA
depends on OF
select GENERIC_PHY
+config PHY_QCOM_EDP
+ tristate "Qualcomm eDP PHY driver"
+ depends on ARCH_QCOM || COMPILE_TEST
+ depends on OF
+ depends on COMMON_CLK
+ select GENERIC_PHY
+ help
+ Enable this driver to support the Qualcomm eDP PHY found in various
+ Qualcomm chipsets.
+
config PHY_QCOM_IPQ4019_USB
tristate "Qualcomm IPQ4019 USB PHY driver"
depends on OF && (ARCH_QCOM || COMPILE_TEST)
diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile
index 47acbd7daa3a..e9e3b1a4dbb0 100644
--- a/drivers/phy/qualcomm/Makefile
+++ b/drivers/phy/qualcomm/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_PHY_ATH79_USB) += phy-ath79-usb.o
obj-$(CONFIG_PHY_QCOM_APQ8064_SATA) += phy-qcom-apq8064-sata.o
+obj-$(CONFIG_PHY_QCOM_EDP) += phy-qcom-edp.o
obj-$(CONFIG_PHY_QCOM_IPQ4019_USB) += phy-qcom-ipq4019-usb.o
obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA) += phy-qcom-ipq806x-sata.o
obj-$(CONFIG_PHY_QCOM_PCIE2) += phy-qcom-pcie2.o
diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c
new file mode 100644
index 000000000000..a8ecd2e8442d
--- /dev/null
+++ b/drivers/phy/qualcomm/phy-qcom-edp.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017, 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/phy/phy.h>
+
+#include "phy-qcom-qmp.h"
+
+/* EDP_PHY registers */
+#define DP_PHY_CFG 0x0010
+#define DP_PHY_CFG_1 0x0014
+#define DP_PHY_PD_CTL 0x001c
+#define DP_PHY_MODE 0x0020
+
+#define DP_PHY_AUX_CFG0 0x0024
+#define DP_PHY_AUX_CFG1 0x0028
+#define DP_PHY_AUX_CFG2 0x002C
+#define DP_PHY_AUX_CFG3 0x0030
+#define DP_PHY_AUX_CFG4 0x0034
+#define DP_PHY_AUX_CFG5 0x0038
+#define DP_PHY_AUX_CFG6 0x003C
+#define DP_PHY_AUX_CFG7 0x0040
+#define DP_PHY_AUX_CFG8 0x0044
+#define DP_PHY_AUX_CFG9 0x0048
+
+#define DP_PHY_AUX_INTERRUPT_MASK 0x0058
+
+#define DP_PHY_VCO_DIV 0x0074
+#define DP_PHY_TX0_TX1_LANE_CTL 0x007c
+#define DP_PHY_TX2_TX3_LANE_CTL 0x00a0
+
+#define DP_PHY_STATUS 0x00e0
+
+/* LANE_TXn registers */
+#define TXn_CLKBUF_ENABLE 0x0000
+#define TXn_TX_EMP_POST1_LVL 0x0004
+
+#define TXn_TX_DRV_LVL 0x0014
+#define TXn_TX_DRV_LVL_OFFSET 0x0018
+#define TXn_RESET_TSYNC_EN 0x001c
+#define TXn_LDO_CONFIG 0x0084
+#define TXn_TX_BAND 0x0028
+
+#define TXn_RES_CODE_LANE_OFFSET_TX0 0x0044
+#define TXn_RES_CODE_LANE_OFFSET_TX1 0x0048
+
+#define TXn_TRANSCEIVER_BIAS_EN 0x0054
+#define TXn_HIGHZ_DRVR_EN 0x0058
+#define TXn_TX_POL_INV 0x005c
+#define TXn_LANE_MODE_1 0x0064
+
+#define TXn_TRAN_DRVR_EMP_EN 0x0078
+
+struct qcom_edp {
+ struct device *dev;
+
+ struct phy *phy;
+
+ void __iomem *edp;
+ void __iomem *tx0;
+ void __iomem *tx1;
+ void __iomem *pll;
+
+ struct clk_hw dp_link_hw;
+ struct clk_hw dp_pixel_hw;
+
+ struct phy_configure_opts_dp dp_opts;
+
+ struct clk_bulk_data clks[2];
+ struct regulator_bulk_data supplies[2];
+};
+
+static int qcom_edp_phy_init(struct phy *phy)
+{
+ struct qcom_edp *edp = phy_get_drvdata(phy);
+ int ret;
+
+ ret = regulator_bulk_enable(ARRAY_SIZE(edp->supplies), edp->supplies);
+ if (ret)
+ return ret;
+
+ ret = clk_bulk_prepare_enable(ARRAY_SIZE(edp->clks), edp->clks);
+ if (ret)
+ goto out_disable_supplies;
+
+ writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
+ DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
+ edp->edp + DP_PHY_PD_CTL);
+
+ /* Turn on BIAS current for PHY/PLL */
+ writel(0x17, edp->pll + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
+
+ writel(DP_PHY_PD_CTL_PSR_PWRDN, edp->edp + DP_PHY_PD_CTL);
+ msleep(20);
+
+ writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
+ DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN |
+ DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
+ edp->edp + DP_PHY_PD_CTL);
+
+ writel(0x00, edp->edp + DP_PHY_AUX_CFG0);
+ writel(0x13, edp->edp + DP_PHY_AUX_CFG1);
+ writel(0x24, edp->edp + DP_PHY_AUX_CFG2);
+ writel(0x00, edp->edp + DP_PHY_AUX_CFG3);
+ writel(0x0a, edp->edp + DP_PHY_AUX_CFG4);
+ writel(0x26, edp->edp + DP_PHY_AUX_CFG5);
+ writel(0x0a, edp->edp + DP_PHY_AUX_CFG6);
+ writel(0x03, edp->edp + DP_PHY_AUX_CFG7);
+ writel(0x37, edp->edp + DP_PHY_AUX_CFG8);
+ writel(0x03, edp->edp + DP_PHY_AUX_CFG9);
+
+ writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
+ PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
+ PHY_AUX_REQ_ERR_MASK, edp->edp + DP_PHY_AUX_INTERRUPT_MASK);
+
+ msleep(20);
+
+ return 0;
+
+out_disable_supplies:
+ regulator_bulk_disable(ARRAY_SIZE(edp->supplies), edp->supplies);
+
+ return ret;
+}
+
+static int qcom_edp_phy_configure(struct phy *phy, union phy_configure_opts *opts)
+{
+ const struct phy_configure_opts_dp *dp_opts = &opts->dp;
+ struct qcom_edp *edp = phy_get_drvdata(phy);
+
+ memcpy(&edp->dp_opts, dp_opts, sizeof(*dp_opts));
+
+ return 0;
+}
+
+static int qcom_edp_configure_ssc(const struct qcom_edp *edp)
+{
+ const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts;
+ u32 step1;
+ u32 step2;
+
+ switch (dp_opts->link_rate) {
+ case 1620:
+ case 2700:
+ case 8100:
+ step1 = 0x45;
+ step2 = 0x06;
+ break;
+
+ case 5400:
+ step1 = 0x5c;
+ step2 = 0x08;
+ break;
+
+ default:
+ /* Other link rates aren't supported */
+ return -EINVAL;
+ }
+
+ writel(0x01, edp->pll + QSERDES_V4_COM_SSC_EN_CENTER);
+ writel(0x00, edp->pll + QSERDES_V4_COM_SSC_ADJ_PER1);
+ writel(0x36, edp->pll + QSERDES_V4_COM_SSC_PER1);
+ writel(0x01, edp->pll + QSERDES_V4_COM_SSC_PER2);
+ writel(step1, edp->pll + QSERDES_V4_COM_SSC_STEP_SIZE1_MODE0);
+ writel(step2, edp->pll + QSERDES_V4_COM_SSC_STEP_SIZE2_MODE0);
+
+ return 0;
+}
+
+static int qcom_edp_configure_pll(const struct qcom_edp *edp)
+{
+ const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts;
+ u32 div_frac_start2_mode0;
+ u32 div_frac_start3_mode0;
+ u32 dec_start_mode0;
+ u32 lock_cmp1_mode0;
+ u32 lock_cmp2_mode0;
+ u32 hsclk_sel;
+
+ switch (dp_opts->link_rate) {
+ case 1620:
+ hsclk_sel = 0x5;
+ dec_start_mode0 = 0x69;
+ div_frac_start2_mode0 = 0x80;
+ div_frac_start3_mode0 = 0x07;
+ lock_cmp1_mode0 = 0x6f;
+ lock_cmp2_mode0 = 0x08;
+ break;
+
+ case 2700:
+ hsclk_sel = 0x3;
+ dec_start_mode0 = 0x69;
+ div_frac_start2_mode0 = 0x80;
+ div_frac_start3_mode0 = 0x07;
+ lock_cmp1_mode0 = 0x0f;
+ lock_cmp2_mode0 = 0x0e;
+ break;
+
+ case 5400:
+ hsclk_sel = 0x1;
+ dec_start_mode0 = 0x8c;
+ div_frac_start2_mode0 = 0x00;
+ div_frac_start3_mode0 = 0x0a;
+ lock_cmp1_mode0 = 0x1f;
+ lock_cmp2_mode0 = 0x1c;
+ break;
+
+ case 8100:
+ hsclk_sel = 0x0;
+ dec_start_mode0 = 0x69;
+ div_frac_start2_mode0 = 0x80;
+ div_frac_start3_mode0 = 0x07;
+ lock_cmp1_mode0 = 0x2f;
+ lock_cmp2_mode0 = 0x2a;
+ break;
+
+ default:
+ /* Other link rates aren't supported */
+ return -EINVAL;
+ }
+
+ writel(0x01, edp->pll + QSERDES_V4_COM_SVS_MODE_CLK_SEL);
+ writel(0x0b, edp->pll + QSERDES_V4_COM_SYSCLK_EN_SEL);
+ writel(0x02, edp->pll + QSERDES_V4_COM_SYS_CLK_CTRL);
+ writel(0x0c, edp->pll + QSERDES_V4_COM_CLK_ENABLE1);
+ writel(0x06, edp->pll + QSERDES_V4_COM_SYSCLK_BUF_ENABLE);
+ writel(0x30, edp->pll + QSERDES_V4_COM_CLK_SELECT);
+ writel(hsclk_sel, edp->pll + QSERDES_V4_COM_HSCLK_SEL);
+ writel(0x0f, edp->pll + QSERDES_V4_COM_PLL_IVCO);
+ writel(0x08, edp->pll + QSERDES_V4_COM_LOCK_CMP_EN);
+ writel(0x36, edp->pll + QSERDES_V4_COM_PLL_CCTRL_MODE0);
+ writel(0x16, edp->pll + QSERDES_V4_COM_PLL_RCTRL_MODE0);
+ writel(0x06, edp->pll + QSERDES_V4_COM_CP_CTRL_MODE0);
+ writel(dec_start_mode0, edp->pll + QSERDES_V4_COM_DEC_START_MODE0);
+ writel(0x00, edp->pll + QSERDES_V4_COM_DIV_FRAC_START1_MODE0);
+ writel(div_frac_start2_mode0, edp->pll + QSERDES_V4_COM_DIV_FRAC_START2_MODE0);
+ writel(div_frac_start3_mode0, edp->pll + QSERDES_V4_COM_DIV_FRAC_START3_MODE0);
+ writel(0x02, edp->pll + QSERDES_V4_COM_CMN_CONFIG);
+ writel(0x3f, edp->pll + QSERDES_V4_COM_INTEGLOOP_GAIN0_MODE0);
+ writel(0x00, edp->pll + QSERDES_V4_COM_INTEGLOOP_GAIN1_MODE0);
+ writel(0x00, edp->pll + QSERDES_V4_COM_VCO_TUNE_MAP);
+ writel(lock_cmp1_mode0, edp->pll + QSERDES_V4_COM_LOCK_CMP1_MODE0);
+ writel(lock_cmp2_mode0, edp->pll + QSERDES_V4_COM_LOCK_CMP2_MODE0);
+
+ writel(0x0a, edp->pll + QSERDES_V4_COM_BG_TIMER);
+ writel(0x14, edp->pll + QSERDES_V4_COM_CORECLK_DIV_MODE0);
+ writel(0x00, edp->pll + QSERDES_V4_COM_VCO_TUNE_CTRL);
+ writel(0x17, edp->pll + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
+ writel(0x0f, edp->pll + QSERDES_V4_COM_CORE_CLK_EN);
+ writel(0xa0, edp->pll + QSERDES_V4_COM_VCO_TUNE1_MODE0);
+ writel(0x03, edp->pll + QSERDES_V4_COM_VCO_TUNE2_MODE0);
+
+ return 0;
+}
+
+static int qcom_edp_set_vco_div(const struct qcom_edp *edp)
+{
+ const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts;
+ unsigned long pixel_freq;
+ u32 vco_div;
+
+ switch (dp_opts->link_rate) {
+ case 1620:
+ vco_div = 0x1;
+ pixel_freq = 1620000000UL / 2;
+ break;
+
+ case 2700:
+ vco_div = 0x1;
+ pixel_freq = 2700000000UL / 2;
+ break;
+
+ case 5400:
+ vco_div = 0x2;
+ pixel_freq = 5400000000UL / 4;
+ break;
+
+ case 8100:
+ vco_div = 0x0;
+ pixel_freq = 8100000000UL / 6;
+ break;
+
+ default:
+ /* Other link rates aren't supported */
+ return -EINVAL;
+ }
+
+ writel(vco_div, edp->edp + DP_PHY_VCO_DIV);
+
+ clk_set_rate(edp->dp_link_hw.clk, dp_opts->link_rate * 100000);
+ clk_set_rate(edp->dp_pixel_hw.clk, pixel_freq);
+
+ return 0;
+}
+
+static int qcom_edp_phy_power_on(struct phy *phy)
+{
+ const struct qcom_edp *edp = phy_get_drvdata(phy);
+ int timeout;
+ int ret;
+ u32 val;
+
+ writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
+ DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN |
+ DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
+ edp->edp + DP_PHY_PD_CTL);
+ writel(0xfc, edp->edp + DP_PHY_MODE);
+
+ timeout = readl_poll_timeout(edp->pll + QSERDES_V4_COM_CMN_STATUS,
+ val, val & BIT(7), 5, 200);
+ if (timeout)
+ return timeout;
+
+ writel(0x01, edp->tx0 + TXn_LDO_CONFIG);
+ writel(0x01, edp->tx1 + TXn_LDO_CONFIG);
+ writel(0x00, edp->tx0 + TXn_LANE_MODE_1);
+ writel(0x00, edp->tx1 + TXn_LANE_MODE_1);
+
+ ret = qcom_edp_configure_ssc(edp);
+ if (ret)
+ return ret;
+
+ ret = qcom_edp_configure_pll(edp);
+ if (ret)
+ return ret;
+
+ /* TX Lane configuration */
+ writel(0x05, edp->edp + DP_PHY_TX0_TX1_LANE_CTL);
+ writel(0x05, edp->edp + DP_PHY_TX2_TX3_LANE_CTL);
+
+ /* TX-0 register configuration */
+ writel(0x03, edp->tx0 + TXn_TRANSCEIVER_BIAS_EN);
+ writel(0x0f, edp->tx0 + TXn_CLKBUF_ENABLE);
+ writel(0x03, edp->tx0 + TXn_RESET_TSYNC_EN);
+ writel(0x01, edp->tx0 + TXn_TRAN_DRVR_EMP_EN);
+ writel(0x04, edp->tx0 + TXn_TX_BAND);
+
+ /* TX-1 register configuration */
+ writel(0x03, edp->tx1 + TXn_TRANSCEIVER_BIAS_EN);
+ writel(0x0f, edp->tx1 + TXn_CLKBUF_ENABLE);
+ writel(0x03, edp->tx1 + TXn_RESET_TSYNC_EN);
+ writel(0x01, edp->tx1 + TXn_TRAN_DRVR_EMP_EN);
+ writel(0x04, edp->tx1 + TXn_TX_BAND);
+
+ ret = qcom_edp_set_vco_div(edp);
+ if (ret)
+ return ret;
+
+ writel(0x01, edp->edp + DP_PHY_CFG);
+ writel(0x05, edp->edp + DP_PHY_CFG);
+ writel(0x01, edp->edp + DP_PHY_CFG);
+ writel(0x09, edp->edp + DP_PHY_CFG);
+
+ writel(0x20, edp->pll + QSERDES_V4_COM_RESETSM_CNTRL);
+
+ timeout = readl_poll_timeout(edp->pll + QSERDES_V4_COM_C_READY_STATUS,
+ val, val & BIT(0), 500, 10000);
+ if (timeout)
+ return timeout;
+
+ writel(0x19, edp->edp + DP_PHY_CFG);
+ writel(0x1f, edp->tx0 + TXn_HIGHZ_DRVR_EN);
+ writel(0x04, edp->tx0 + TXn_HIGHZ_DRVR_EN);
+ writel(0x00, edp->tx0 + TXn_TX_POL_INV);
+ writel(0x1f, edp->tx1 + TXn_HIGHZ_DRVR_EN);
+ writel(0x04, edp->tx1 + TXn_HIGHZ_DRVR_EN);
+ writel(0x00, edp->tx1 + TXn_TX_POL_INV);
+ writel(0x10, edp->tx0 + TXn_TX_DRV_LVL_OFFSET);
+ writel(0x10, edp->tx1 + TXn_TX_DRV_LVL_OFFSET);
+ writel(0x11, edp->tx0 + TXn_RES_CODE_LANE_OFFSET_TX0);
+ writel(0x11, edp->tx0 + TXn_RES_CODE_LANE_OFFSET_TX1);
+ writel(0x11, edp->tx1 + TXn_RES_CODE_LANE_OFFSET_TX0);
+ writel(0x11, edp->tx1 + TXn_RES_CODE_LANE_OFFSET_TX1);
+
+ writel(0x10, edp->tx0 + TXn_TX_EMP_POST1_LVL);
+ writel(0x10, edp->tx1 + TXn_TX_EMP_POST1_LVL);
+ writel(0x1f, edp->tx0 + TXn_TX_DRV_LVL);
+ writel(0x1f, edp->tx1 + TXn_TX_DRV_LVL);
+
+ writel(0x4, edp->tx0 + TXn_HIGHZ_DRVR_EN);
+ writel(0x3, edp->tx0 + TXn_TRANSCEIVER_BIAS_EN);
+ writel(0x4, edp->tx1 + TXn_HIGHZ_DRVR_EN);
+ writel(0x0, edp->tx1 + TXn_TRANSCEIVER_BIAS_EN);
+ writel(0x3, edp->edp + DP_PHY_CFG_1);
+
+ writel(0x18, edp->edp + DP_PHY_CFG);
+ usleep_range(100, 1000);
+
+ writel(0x19, edp->edp + DP_PHY_CFG);
+
+ return readl_poll_timeout(edp->edp + DP_PHY_STATUS,
+ val, val & BIT(1), 500, 10000);
+}
+
+static int qcom_edp_phy_power_off(struct phy *phy)
+{
+ const struct qcom_edp *edp = phy_get_drvdata(phy);
+
+ writel(DP_PHY_PD_CTL_PSR_PWRDN, edp->edp + DP_PHY_PD_CTL);
+
+ return 0;
+}
+
+static int qcom_edp_phy_exit(struct phy *phy)
+{
+ struct qcom_edp *edp = phy_get_drvdata(phy);
+
+ clk_bulk_disable_unprepare(ARRAY_SIZE(edp->clks), edp->clks);
+ regulator_bulk_disable(ARRAY_SIZE(edp->supplies), edp->supplies);
+
+ return 0;
+}
+
+static const struct phy_ops qcom_edp_ops = {
+ .init = qcom_edp_phy_init,
+ .configure = qcom_edp_phy_configure,
+ .power_on = qcom_edp_phy_power_on,
+ .power_off = qcom_edp_phy_power_off,
+ .exit = qcom_edp_phy_exit,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * Embedded Display Port PLL driver block diagram for branch clocks
+ *
+ * +------------------------------+
+ * | EDP_VCO_CLK |
+ * | |
+ * | +-------------------+ |
+ * | | (EDP PLL/VCO) | |
+ * | +---------+---------+ |
+ * | v |
+ * | +----------+-----------+ |
+ * | | hsclk_divsel_clk_src | |
+ * | +----------+-----------+ |
+ * +------------------------------+
+ * |
+ * +---------<---------v------------>----------+
+ * | |
+ * +--------v----------------+ |
+ * | edp_phy_pll_link_clk | |
+ * | link_clk | |
+ * +--------+----------------+ |
+ * | |
+ * | |
+ * v v
+ * Input to DISPCC block |
+ * for link clk, crypto clk |
+ * and interface clock |
+ * |
+ * |
+ * +--------<------------+-----------------+---<---+
+ * | | |
+ * +----v---------+ +--------v-----+ +--------v------+
+ * | vco_divided | | vco_divided | | vco_divided |
+ * | _clk_src | | _clk_src | | _clk_src |
+ * | | | | | |
+ * |divsel_six | | divsel_two | | divsel_four |
+ * +-------+------+ +-----+--------+ +--------+------+
+ * | | |
+ * v---->----------v-------------<------v
+ * |
+ * +----------+-----------------+
+ * | edp_phy_pll_vco_div_clk |
+ * +---------+------------------+
+ * |
+ * v
+ * Input to DISPCC block
+ * for EDP pixel clock
+ *
+ */
+static int qcom_edp_dp_pixel_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
+{
+ switch (req->rate) {
+ case 1620000000UL / 2:
+ case 2700000000UL / 2:
+ /* 5.4 and 8.1 GHz are same link rate as 2.7GHz, i.e. div 4 and div 6 */
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static unsigned long
+qcom_edp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+{
+ const struct qcom_edp *edp = container_of(hw, struct qcom_edp, dp_pixel_hw);
+ const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts;
+
+ switch (dp_opts->link_rate) {
+ case 1620:
+ return 1620000000UL / 2;
+ case 2700:
+ return 2700000000UL / 2;
+ case 5400:
+ return 5400000000UL / 4;
+ case 8100:
+ return 8100000000UL / 6;
+ default:
+ return 0;
+ }
+}
+
+static const struct clk_ops qcom_edp_dp_pixel_clk_ops = {
+ .determine_rate = qcom_edp_dp_pixel_clk_determine_rate,
+ .recalc_rate = qcom_edp_dp_pixel_clk_recalc_rate,
+};
+
+static int qcom_edp_dp_link_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
+{
+ switch (req->rate) {
+ case 162000000:
+ case 270000000:
+ case 540000000:
+ case 810000000:
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static unsigned long
+qcom_edp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+{
+ const struct qcom_edp *edp = container_of(hw, struct qcom_edp, dp_link_hw);
+ const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts;
+
+ switch (dp_opts->link_rate) {
+ case 1620:
+ case 2700:
+ case 5400:
+ case 8100:
+ return dp_opts->link_rate * 100000;
+
+ default:
+ return 0;
+ }
+}
+
+static const struct clk_ops qcom_edp_dp_link_clk_ops = {
+ .determine_rate = qcom_edp_dp_link_clk_determine_rate,
+ .recalc_rate = qcom_edp_dp_link_clk_recalc_rate,
+};
+
+static int qcom_edp_clks_register(struct qcom_edp *edp, struct device_node *np)
+{
+ struct clk_hw_onecell_data *data;
+ struct clk_init_data init = { };
+ int ret;
+
+ data = devm_kzalloc(edp->dev, struct_size(data, hws, 2), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ init.ops = &qcom_edp_dp_link_clk_ops;
+ init.name = "edp_phy_pll_link_clk";
+ edp->dp_link_hw.init = &init;
+ ret = devm_clk_hw_register(edp->dev, &edp->dp_link_hw);
+ if (ret)
+ return ret;
+
+ init.ops = &qcom_edp_dp_pixel_clk_ops;
+ init.name = "edp_phy_pll_vco_div_clk";
+ edp->dp_pixel_hw.init = &init;
+ ret = devm_clk_hw_register(edp->dev, &edp->dp_pixel_hw);
+ if (ret)
+ return ret;
+
+ data->hws[0] = &edp->dp_link_hw;
+ data->hws[1] = &edp->dp_pixel_hw;
+ data->num = 2;
+
+ return devm_of_clk_add_hw_provider(edp->dev, of_clk_hw_onecell_get, data);
+}
+
+static int qcom_edp_phy_probe(struct platform_device *pdev)
+{
+ struct phy_provider *phy_provider;
+ struct device *dev = &pdev->dev;
+ struct qcom_edp *edp;
+ int ret;
+
+ edp = devm_kzalloc(dev, sizeof(*edp), GFP_KERNEL);
+ if (!edp)
+ return -ENOMEM;
+
+ edp->dev = dev;
+
+ edp->edp = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(edp->edp))
+ return PTR_ERR(edp->edp);
+
+ edp->tx0 = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(edp->tx0))
+ return PTR_ERR(edp->tx0);
+
+ edp->tx1 = devm_platform_ioremap_resource(pdev, 2);
+ if (IS_ERR(edp->tx1))
+ return PTR_ERR(edp->tx1);
+
+ edp->pll = devm_platform_ioremap_resource(pdev, 3);
+ if (IS_ERR(edp->pll))
+ return PTR_ERR(edp->pll);
+
+ edp->clks[0].id = "aux";
+ edp->clks[1].id = "cfg_ahb";
+ ret = devm_clk_bulk_get(dev, ARRAY_SIZE(edp->clks), edp->clks);
+ if (ret)
+ return ret;
+
+ edp->supplies[0].supply = "vdda-phy";
+ edp->supplies[1].supply = "vdda-pll";
+ ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(edp->supplies), edp->supplies);
+ if (ret)
+ return ret;
+
+ ret = qcom_edp_clks_register(edp, pdev->dev.of_node);
+ if (ret)
+ return ret;
+
+ edp->phy = devm_phy_create(dev, pdev->dev.of_node, &qcom_edp_ops);
+ if (IS_ERR(edp->phy)) {
+ dev_err(dev, "failed to register phy\n");
+ return PTR_ERR(edp->phy);
+ }
+
+ phy_set_drvdata(edp->phy, edp);
+
+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+ return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id qcom_edp_phy_match_table[] = {
+ { .compatible = "qcom,sc8180x-edp-phy" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, qcom_edp_phy_match_table);
+
+static struct platform_driver qcom_edp_phy_driver = {
+ .probe = qcom_edp_phy_probe,
+ .driver = {
+ .name = "qcom-edp-phy",
+ .of_match_table = qcom_edp_phy_match_table,
+ },
+};
+
+module_platform_driver(qcom_edp_phy_driver);
+
+MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@linaro.org>");
+MODULE_DESCRIPTION("Qualcomm eDP QMP PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
index c96639d5f581..8ea87c69f463 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -2866,6 +2866,215 @@ static const struct qmp_phy_init_tbl qcm2290_usb3_pcs_tbl[] = {
QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x88),
};
+static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_serdes_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE0, 0x24),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE2_MODE1, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE1, 0xb4),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x68),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0xaa),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0xab),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xca),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x18),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xa2),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_BUF_ENABLE, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x4c),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_tx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20),
+ QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75),
+ QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x04),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_rx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbf),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH4, 0x38),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_TX_ADAPT_POST_THRESH, 0xf0),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x09),
+ QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x05),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_pcs_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x77),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_pcs_misc_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_EQ_CONFIG2, 0x0f),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x97),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0xd0),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0x55),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x20),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14),
+ QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_tx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_TX_LANE_MODE_1, 0x05),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_TX_LANE_MODE_2, 0xf6),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_TX_RES_CODE_LANE_OFFSET_TX, 0x1a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_TX_RES_CODE_LANE_OFFSET_RX, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rx_tbl[] = {
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_PI_CONTROLS, 0x16),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B1, 0xcc),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0x12),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B3, 0xcc),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5, 0x4a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6, 0x29),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0xc5),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B1, 0xad),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0xb6),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B3, 0xc0),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B4, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B5, 0xfb),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B6, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xc7),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B1, 0xef),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xbf),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xa0),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B4, 0x81),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xde),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x7f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_PHPRE_CTRL, 0x20),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_0_1, 0x3f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_2_3, 0x37),
+
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_3, 0x05),
+
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE3, 0x1f),
+
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE3, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE3, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH4_RATE3, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH5_RATE3, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH6_RATE3, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE210, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE210, 0x1f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE210, 0x1f),
+
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE2, 0x0c),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE3, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x0a),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0b),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_IDAC_SAOFFSET, 0x10),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_DAC_ENABLE1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_GM_CAL, 0x0f),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH1, 0x00),
+ QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH2, 0x1f),
+};
+
+/* Register names should be validated, they might be different for this PHY */
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG2, 0x16),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x22),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_G3S2_PRE_GAIN, 0x2e),
+ QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x99),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02),
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_EQ_CONFIG1, 0x16),
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3, 0x28),
+ QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e),
+};
+
struct qmp_phy;
/* struct qmp_phy_cfg - per-PHY initialization config */
@@ -3094,6 +3303,10 @@ static const char * const qmp_v4_sm8250_usbphy_clk_l[] = {
"aux", "ref_clk_src", "com_aux"
};
+static const char * const sm8450_ufs_phy_clk_l[] = {
+ "qref", "ref", "ref_aux",
+};
+
static const char * const sdm845_ufs_phy_clk_l[] = {
"ref", "ref_aux",
};
@@ -4090,6 +4303,94 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
.pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX,
};
+static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
+ .type = PHY_TYPE_UFS,
+ .nlanes = 2,
+
+ .serdes_tbl = sm8350_ufsphy_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(sm8350_ufsphy_serdes_tbl),
+ .tx_tbl = sm8350_ufsphy_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_tx_tbl),
+ .rx_tbl = sm8350_ufsphy_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_rx_tbl),
+ .pcs_tbl = sm8350_ufsphy_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(sm8350_ufsphy_pcs_tbl),
+ .clk_list = sm8450_ufs_phy_clk_l,
+ .num_clks = ARRAY_SIZE(sm8450_ufs_phy_clk_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = sm8150_ufsphy_regs_layout,
+
+ .start_ctrl = SERDES_START,
+ .pwrdn_ctrl = SW_PWRDN,
+ .phy_status = PHYSTATUS,
+
+ .is_dual_lane_phy = true,
+};
+
+static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
+ .type = PHY_TYPE_PCIE,
+ .nlanes = 1,
+
+ .serdes_tbl = sm8450_qmp_gen3x1_pcie_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl),
+ .tx_tbl = sm8450_qmp_gen3x1_pcie_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl),
+ .rx_tbl = sm8450_qmp_gen3x1_pcie_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl),
+ .pcs_tbl = sm8450_qmp_gen3x1_pcie_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl),
+ .pcs_misc_tbl = sm8450_qmp_gen3x1_pcie_pcs_misc_tbl,
+ .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl),
+ .clk_list = sdm845_pciephy_clk_l,
+ .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l),
+ .reset_list = sdm845_pciephy_reset_l,
+ .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = sm8250_pcie_regs_layout,
+
+ .start_ctrl = SERDES_START | PCS_START,
+ .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
+ .phy_status = PHYSTATUS,
+
+ .has_pwrdn_delay = true,
+ .pwrdn_delay_min = 995, /* us */
+ .pwrdn_delay_max = 1005, /* us */
+};
+
+static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
+ .type = PHY_TYPE_PCIE,
+ .nlanes = 2,
+
+ .serdes_tbl = sm8450_qmp_gen4x2_pcie_serdes_tbl,
+ .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl),
+ .tx_tbl = sm8450_qmp_gen4x2_pcie_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl),
+ .rx_tbl = sm8450_qmp_gen4x2_pcie_rx_tbl,
+ .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl),
+ .pcs_tbl = sm8450_qmp_gen4x2_pcie_pcs_tbl,
+ .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl),
+ .pcs_misc_tbl = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl,
+ .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl),
+ .clk_list = sdm845_pciephy_clk_l,
+ .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l),
+ .reset_list = sdm845_pciephy_reset_l,
+ .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = sm8250_pcie_regs_layout,
+
+ .start_ctrl = SERDES_START | PCS_START,
+ .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL,
+ .phy_status = PHYSTATUS_4_20,
+
+ .is_dual_lane_phy = true,
+ .has_pwrdn_delay = true,
+ .pwrdn_delay_min = 995, /* us */
+ .pwrdn_delay_max = 1005, /* us */
+};
+
static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
.type = PHY_TYPE_USB3,
.nlanes = 1,
@@ -5749,6 +6050,18 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
.compatible = "qcom,sm8350-qmp-usb3-uni-phy",
.data = &sm8350_usb3_uniphy_cfg,
}, {
+ .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy",
+ .data = &sm8450_qmp_gen3x1_pciephy_cfg,
+ }, {
+ .compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy",
+ .data = &sm8450_qmp_gen4x2_pciephy_cfg,
+ }, {
+ .compatible = "qcom,sm8450-qmp-ufs-phy",
+ .data = &sm8450_ufsphy_cfg,
+ }, {
+ .compatible = "qcom,sm8450-qmp-usb3-phy",
+ .data = &sm8350_usb3phy_cfg,
+ }, {
.compatible = "qcom,qcm2290-qmp-usb3-phy",
.data = &qcm2290_usb3phy_cfg,
},
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index e15f461065bb..06b2556ed93a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -551,6 +551,7 @@
/* Only for QMP V4 PHY - QSERDES COM registers */
#define QSERDES_V4_COM_BG_TIMER 0x00c
#define QSERDES_V4_COM_SSC_EN_CENTER 0x010
+#define QSERDES_V4_COM_SSC_ADJ_PER1 0x014
#define QSERDES_V4_COM_SSC_PER1 0x01c
#define QSERDES_V4_COM_SSC_PER2 0x020
#define QSERDES_V4_COM_SSC_STEP_SIZE1_MODE0 0x024
@@ -1069,6 +1070,16 @@
#define QPHY_V4_20_PCS_LANE1_INSIG_MX_CTRL2 0x828
/* Only for QMP V5 PHY - QSERDES COM registers */
+#define QSERDES_V5_COM_SSC_EN_CENTER 0x010
+#define QSERDES_V5_COM_SSC_PER1 0x01c
+#define QSERDES_V5_COM_SSC_PER2 0x020
+#define QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0 0x024
+#define QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0 0x028
+#define QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1 0x030
+#define QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1 0x034
+#define QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN 0x044
+#define QSERDES_V5_COM_CLK_ENABLE1 0x048
+#define QSERDES_V5_COM_SYSCLK_BUF_ENABLE 0x050
#define QSERDES_V5_COM_PLL_IVCO 0x058
#define QSERDES_V5_COM_CP_CTRL_MODE0 0x074
#define QSERDES_V5_COM_CP_CTRL_MODE1 0x078
@@ -1078,16 +1089,35 @@
#define QSERDES_V5_COM_PLL_CCTRL_MODE1 0x088
#define QSERDES_V5_COM_SYSCLK_EN_SEL 0x094
#define QSERDES_V5_COM_LOCK_CMP_EN 0x0a4
+#define QSERDES_V5_COM_LOCK_CMP_CFG 0x0a8
#define QSERDES_V5_COM_LOCK_CMP1_MODE0 0x0ac
#define QSERDES_V5_COM_LOCK_CMP2_MODE0 0x0b0
#define QSERDES_V5_COM_LOCK_CMP1_MODE1 0x0b4
#define QSERDES_V5_COM_DEC_START_MODE0 0x0bc
#define QSERDES_V5_COM_LOCK_CMP2_MODE1 0x0b8
#define QSERDES_V5_COM_DEC_START_MODE1 0x0c4
+#define QSERDES_V5_COM_DIV_FRAC_START1_MODE0 0x0cc
+#define QSERDES_V5_COM_DIV_FRAC_START2_MODE0 0x0d0
+#define QSERDES_V5_COM_DIV_FRAC_START3_MODE0 0x0d4
+#define QSERDES_V5_COM_DIV_FRAC_START1_MODE1 0x0d8
+#define QSERDES_V5_COM_DIV_FRAC_START2_MODE1 0x0dc
+#define QSERDES_V5_COM_DIV_FRAC_START3_MODE1 0x0e0
#define QSERDES_V5_COM_VCO_TUNE_MAP 0x10c
+#define QSERDES_V5_COM_VCO_TUNE1_MODE0 0x110
+#define QSERDES_V5_COM_VCO_TUNE2_MODE0 0x114
+#define QSERDES_V5_COM_VCO_TUNE1_MODE1 0x118
+#define QSERDES_V5_COM_VCO_TUNE2_MODE1 0x11c
#define QSERDES_V5_COM_VCO_TUNE_INITVAL2 0x124
+#define QSERDES_V5_COM_CLK_SELECT 0x154
#define QSERDES_V5_COM_HSCLK_SEL 0x158
#define QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL 0x15c
+#define QSERDES_V5_COM_CORECLK_DIV_MODE0 0x168
+#define QSERDES_V5_COM_CORECLK_DIV_MODE1 0x16c
+#define QSERDES_V5_COM_CORE_CLK_EN 0x174
+#define QSERDES_V5_COM_CMN_CONFIG 0x17c
+#define QSERDES_V5_COM_CMN_MISC1 0x19c
+#define QSERDES_V5_COM_CMN_MODE 0x1a4
+#define QSERDES_V5_COM_VCO_DC_LEVEL_CTRL 0x1a8
#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0 0x1ac
#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0 0x1b0
#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1 0x1b4
@@ -1112,6 +1142,12 @@
#define QSERDES_V5_TX_PWM_GEAR_3_DIVIDER_BAND0_1 0x180
#define QSERDES_V5_TX_PWM_GEAR_4_DIVIDER_BAND0_1 0x184
+/* Only for QMP V5_20 PHY - TX registers */
+#define QSERDES_V5_20_TX_RES_CODE_LANE_OFFSET_TX 0x30
+#define QSERDES_V5_20_TX_RES_CODE_LANE_OFFSET_RX 0x34
+#define QSERDES_V5_20_TX_LANE_MODE_1 0x78
+#define QSERDES_V5_20_TX_LANE_MODE_2 0x7c
+
/* Only for QMP V5 PHY - RX registers */
#define QSERDES_V5_RX_UCDR_FO_GAIN 0x008
#define QSERDES_V5_RX_UCDR_SO_GAIN 0x014
@@ -1130,6 +1166,7 @@
#define QSERDES_V5_RX_AC_JTAG_ENABLE 0x068
#define QSERDES_V5_RX_AC_JTAG_MODE 0x078
#define QSERDES_V5_RX_RX_TERM_BW 0x080
+#define QSERDES_V5_RX_TX_ADAPT_POST_THRESH 0x0cc
#define QSERDES_V5_RX_VGA_CAL_CNTRL1 0x0d4
#define QSERDES_V5_RX_VGA_CAL_CNTRL2 0x0d8
#define QSERDES_V5_RX_GM_CAL 0x0dc
@@ -1167,6 +1204,73 @@
#define QSERDES_V5_RX_DCC_CTRL1 0x1a8
#define QSERDES_V5_RX_VTH_CODE 0x1b0
+/* Only for QMP V5_20 PHY - RX registers */
+#define QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE2 0x008
+#define QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE3 0x00c
+#define QSERDES_V5_20_RX_UCDR_PI_CONTROLS 0x020
+#define QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_0_1 0x02c
+#define QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_2_3 0x030
+#define QSERDES_V5_20_RX_RX_IDAC_SAOFFSET 0x07c
+#define QSERDES_V5_20_RX_DFE_3 0x090
+#define QSERDES_V5_20_RX_DFE_DAC_ENABLE1 0x0b4
+#define QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH1 0x0c4
+#define QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH2 0x0c8
+#define QSERDES_V5_20_RX_VGA_CAL_MAN_VAL 0x0dc
+#define QSERDES_V5_20_RX_GM_CAL 0x0ec
+#define QSERDES_V5_20_RX_RX_EQU_ADAPTOR_CNTRL4 0x108
+#define QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B1 0x164
+#define QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2 0x168
+#define QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B3 0x16c
+#define QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5 0x174
+#define QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6 0x178
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B0 0x17c
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B1 0x180
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B2 0x184
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B3 0x188
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B4 0x18c
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B5 0x190
+#define QSERDES_V5_20_RX_RX_MODE_RATE2_B6 0x194
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B0 0x198
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B1 0x19c
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B2 0x1a0
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B3 0x1a4
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B4 0x1a8
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B5 0x1ac
+#define QSERDES_V5_20_RX_RX_MODE_RATE3_B6 0x1b0
+#define QSERDES_V5_20_RX_PHPRE_CTRL 0x1b4
+#define QSERDES_V5_20_RX_DFE_CTLE_POST_CAL_OFFSET 0x1c0
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE210 0x1f4
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE3 0x1f8
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE210 0x1fc
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE3 0x200
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE210 0x204
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE3 0x208
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH4_RATE3 0x210
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH5_RATE3 0x218
+#define QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH6_RATE3 0x220
+
+/* Only for QMP V5 PHY - USB/PCIe PCS registers */
+#define QPHY_V5_PCS_REFGEN_REQ_CONFIG1 0x0dc
+#define QPHY_V5_PCS_G3S2_PRE_GAIN 0x170
+#define QPHY_V5_PCS_RX_SIGDET_LVL 0x188
+#define QPHY_V5_PCS_RATE_SLEW_CNTRL1 0x198
+#define QPHY_V5_PCS_EQ_CONFIG2 0x1e0
+#define QPHY_V5_PCS_EQ_CONFIG3 0x1e4
+
+/* Only for QMP V5 PHY - PCS_PCIE registers */
+#define QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x20
+#define QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1 0x54
+#define QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS 0x94
+#define QPHY_V5_PCS_PCIE_EQ_CONFIG2 0xa8
+
+/* Only for QMP V5_20 PHY - PCIe PCS registers */
+#define QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x01c
+#define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090
+#define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0
+#define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108
+#define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN 0x15c
+#define QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3 0x184
+
/* Only for QMP V5 PHY - UFS PCS registers */
#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB 0x00c
#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB 0x010
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
index 1938365abbb3..eca77e44a4c1 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
@@ -204,6 +204,7 @@ struct rockchip_usb2phy_port {
* @dcd_retries: The retry count used to track Data contact
* detection process.
* @edev: extcon device for notification registration
+ * @irq: muxed interrupt for single irq configuration
* @phy_cfg: phy register configuration, assigned by driver data.
* @ports: phy port instance.
*/
@@ -218,6 +219,7 @@ struct rockchip_usb2phy {
enum power_supply_type chg_type;
u8 dcd_retries;
struct extcon_dev *edev;
+ int irq;
const struct rockchip_usb2phy_cfg *phy_cfg;
struct rockchip_usb2phy_port ports[USB2PHY_NUM_PORTS];
};
@@ -750,7 +752,6 @@ static void rockchip_chg_detect_work(struct work_struct *work)
fallthrough;
case USB_CHG_STATE_SECONDARY_DONE:
rphy->chg_state = USB_CHG_STATE_DETECTED;
- delay = 0;
fallthrough;
case USB_CHG_STATE_DETECTED:
/* put the controller in normal mode */
@@ -927,6 +928,102 @@ static irqreturn_t rockchip_usb2phy_otg_mux_irq(int irq, void *data)
return IRQ_NONE;
}
+static irqreturn_t rockchip_usb2phy_irq(int irq, void *data)
+{
+ struct rockchip_usb2phy *rphy = data;
+ struct rockchip_usb2phy_port *rport;
+ irqreturn_t ret = IRQ_NONE;
+ unsigned int index;
+
+ for (index = 0; index < rphy->phy_cfg->num_ports; index++) {
+ rport = &rphy->ports[index];
+ if (!rport->phy)
+ continue;
+
+ /* Handle linestate irq for both otg port and host port */
+ ret = rockchip_usb2phy_linestate_irq(irq, rport);
+ }
+
+ return ret;
+}
+
+static int rockchip_usb2phy_port_irq_init(struct rockchip_usb2phy *rphy,
+ struct rockchip_usb2phy_port *rport,
+ struct device_node *child_np)
+{
+ int ret;
+
+ /*
+ * If the usb2 phy used combined irq for otg and host port,
+ * don't need to init otg and host port irq separately.
+ */
+ if (rphy->irq > 0)
+ return 0;
+
+ switch (rport->port_id) {
+ case USB2PHY_PORT_HOST:
+ rport->ls_irq = of_irq_get_byname(child_np, "linestate");
+ if (rport->ls_irq < 0) {
+ dev_err(rphy->dev, "no linestate irq provided\n");
+ return rport->ls_irq;
+ }
+
+ ret = devm_request_threaded_irq(rphy->dev, rport->ls_irq, NULL,
+ rockchip_usb2phy_linestate_irq,
+ IRQF_ONESHOT,
+ "rockchip_usb2phy", rport);
+ if (ret) {
+ dev_err(rphy->dev, "failed to request linestate irq handle\n");
+ return ret;
+ }
+ break;
+ case USB2PHY_PORT_OTG:
+ /*
+ * Some SoCs use one interrupt with otg-id/otg-bvalid/linestate
+ * interrupts muxed together, so probe the otg-mux interrupt first,
+ * if not found, then look for the regular interrupts one by one.
+ */
+ rport->otg_mux_irq = of_irq_get_byname(child_np, "otg-mux");
+ if (rport->otg_mux_irq > 0) {
+ ret = devm_request_threaded_irq(rphy->dev, rport->otg_mux_irq,
+ NULL,
+ rockchip_usb2phy_otg_mux_irq,
+ IRQF_ONESHOT,
+ "rockchip_usb2phy_otg",
+ rport);
+ if (ret) {
+ dev_err(rphy->dev,
+ "failed to request otg-mux irq handle\n");
+ return ret;
+ }
+ } else {
+ rport->bvalid_irq = of_irq_get_byname(child_np, "otg-bvalid");
+ if (rport->bvalid_irq < 0) {
+ dev_err(rphy->dev, "no vbus valid irq provided\n");
+ ret = rport->bvalid_irq;
+ return ret;
+ }
+
+ ret = devm_request_threaded_irq(rphy->dev, rport->bvalid_irq,
+ NULL,
+ rockchip_usb2phy_bvalid_irq,
+ IRQF_ONESHOT,
+ "rockchip_usb2phy_bvalid",
+ rport);
+ if (ret) {
+ dev_err(rphy->dev,
+ "failed to request otg-bvalid irq handle\n");
+ return ret;
+ }
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int rockchip_usb2phy_host_port_init(struct rockchip_usb2phy *rphy,
struct rockchip_usb2phy_port *rport,
struct device_node *child_np)
@@ -940,18 +1037,9 @@ static int rockchip_usb2phy_host_port_init(struct rockchip_usb2phy *rphy,
mutex_init(&rport->mutex);
INIT_DELAYED_WORK(&rport->sm_work, rockchip_usb2phy_sm_work);
- rport->ls_irq = of_irq_get_byname(child_np, "linestate");
- if (rport->ls_irq < 0) {
- dev_err(rphy->dev, "no linestate irq provided\n");
- return rport->ls_irq;
- }
-
- ret = devm_request_threaded_irq(rphy->dev, rport->ls_irq, NULL,
- rockchip_usb2phy_linestate_irq,
- IRQF_ONESHOT,
- "rockchip_usb2phy", rport);
+ ret = rockchip_usb2phy_port_irq_init(rphy, rport, child_np);
if (ret) {
- dev_err(rphy->dev, "failed to request linestate irq handle\n");
+ dev_err(rphy->dev, "failed to setup host irq\n");
return ret;
}
@@ -1000,43 +1088,10 @@ static int rockchip_usb2phy_otg_port_init(struct rockchip_usb2phy *rphy,
INIT_DELAYED_WORK(&rport->chg_work, rockchip_chg_detect_work);
INIT_DELAYED_WORK(&rport->otg_sm_work, rockchip_usb2phy_otg_sm_work);
- /*
- * Some SoCs use one interrupt with otg-id/otg-bvalid/linestate
- * interrupts muxed together, so probe the otg-mux interrupt first,
- * if not found, then look for the regular interrupts one by one.
- */
- rport->otg_mux_irq = of_irq_get_byname(child_np, "otg-mux");
- if (rport->otg_mux_irq > 0) {
- ret = devm_request_threaded_irq(rphy->dev, rport->otg_mux_irq,
- NULL,
- rockchip_usb2phy_otg_mux_irq,
- IRQF_ONESHOT,
- "rockchip_usb2phy_otg",
- rport);
- if (ret) {
- dev_err(rphy->dev,
- "failed to request otg-mux irq handle\n");
- goto out;
- }
- } else {
- rport->bvalid_irq = of_irq_get_byname(child_np, "otg-bvalid");
- if (rport->bvalid_irq < 0) {
- dev_err(rphy->dev, "no vbus valid irq provided\n");
- ret = rport->bvalid_irq;
- goto out;
- }
-
- ret = devm_request_threaded_irq(rphy->dev, rport->bvalid_irq,
- NULL,
- rockchip_usb2phy_bvalid_irq,
- IRQF_ONESHOT,
- "rockchip_usb2phy_bvalid",
- rport);
- if (ret) {
- dev_err(rphy->dev,
- "failed to request otg-bvalid irq handle\n");
- goto out;
- }
+ ret = rockchip_usb2phy_port_irq_init(rphy, rport, child_np);
+ if (ret) {
+ dev_err(rphy->dev, "failed to init irq for host port\n");
+ goto out;
}
if (!IS_ERR(rphy->edev)) {
@@ -1074,12 +1129,19 @@ static int rockchip_usb2phy_probe(struct platform_device *pdev)
return -EINVAL;
}
- if (!dev->parent || !dev->parent->of_node)
- return -EINVAL;
+ if (!dev->parent || !dev->parent->of_node) {
+ rphy->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,usbgrf");
+ if (IS_ERR(rphy->grf)) {
+ dev_err(dev, "failed to locate usbgrf\n");
+ return PTR_ERR(rphy->grf);
+ }
+ }
- rphy->grf = syscon_node_to_regmap(dev->parent->of_node);
- if (IS_ERR(rphy->grf))
- return PTR_ERR(rphy->grf);
+ else {
+ rphy->grf = syscon_node_to_regmap(dev->parent->of_node);
+ if (IS_ERR(rphy->grf))
+ return PTR_ERR(rphy->grf);
+ }
if (of_device_is_compatible(np, "rockchip,rv1108-usb2phy")) {
rphy->usbgrf =
@@ -1091,16 +1153,26 @@ static int rockchip_usb2phy_probe(struct platform_device *pdev)
rphy->usbgrf = NULL;
}
- if (of_property_read_u32(np, "reg", &reg)) {
+ if (of_property_read_u32_index(np, "reg", 0, &reg)) {
dev_err(dev, "the reg property is not assigned in %pOFn node\n",
np);
return -EINVAL;
}
+ /* support address_cells=2 */
+ if (reg == 0) {
+ if (of_property_read_u32_index(np, "reg", 1, &reg)) {
+ dev_err(dev, "the reg property is not assigned in %pOFn node\n",
+ np);
+ return -EINVAL;
+ }
+ }
+
rphy->dev = dev;
phy_cfgs = match->data;
rphy->chg_state = USB_CHG_STATE_UNDEFINED;
rphy->chg_type = POWER_SUPPLY_TYPE_UNKNOWN;
+ rphy->irq = platform_get_irq_optional(pdev, 0);
platform_set_drvdata(pdev, rphy);
ret = rockchip_usb2phy_extcon_register(rphy);
@@ -1180,6 +1252,20 @@ next_child:
}
provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+ if (rphy->irq > 0) {
+ ret = devm_request_threaded_irq(rphy->dev, rphy->irq, NULL,
+ rockchip_usb2phy_irq,
+ IRQF_ONESHOT,
+ "rockchip_usb2phy",
+ rphy);
+ if (ret) {
+ dev_err(rphy->dev,
+ "failed to request usb2phy irq handle\n");
+ goto put_child;
+ }
+ }
+
return PTR_ERR_OR_ZERO(provider);
put_child:
@@ -1418,6 +1504,69 @@ static const struct rockchip_usb2phy_cfg rk3399_phy_cfgs[] = {
{ /* sentinel */ }
};
+static const struct rockchip_usb2phy_cfg rk3568_phy_cfgs[] = {
+ {
+ .reg = 0xfe8a0000,
+ .num_ports = 2,
+ .clkout_ctl = { 0x0008, 4, 4, 1, 0 },
+ .port_cfgs = {
+ [USB2PHY_PORT_OTG] = {
+ .phy_sus = { 0x0000, 8, 0, 0, 0x1d1 },
+ .bvalid_det_en = { 0x0080, 2, 2, 0, 1 },
+ .bvalid_det_st = { 0x0084, 2, 2, 0, 1 },
+ .bvalid_det_clr = { 0x0088, 2, 2, 0, 1 },
+ .utmi_avalid = { 0x00c0, 10, 10, 0, 1 },
+ .utmi_bvalid = { 0x00c0, 9, 9, 0, 1 },
+ },
+ [USB2PHY_PORT_HOST] = {
+ /* Select suspend control from controller */
+ .phy_sus = { 0x0004, 8, 0, 0x1d2, 0x1d2 },
+ .ls_det_en = { 0x0080, 1, 1, 0, 1 },
+ .ls_det_st = { 0x0084, 1, 1, 0, 1 },
+ .ls_det_clr = { 0x0088, 1, 1, 0, 1 },
+ .utmi_ls = { 0x00c0, 17, 16, 0, 1 },
+ .utmi_hstdet = { 0x00c0, 19, 19, 0, 1 }
+ }
+ },
+ .chg_det = {
+ .opmode = { 0x0000, 3, 0, 5, 1 },
+ .cp_det = { 0x00c0, 24, 24, 0, 1 },
+ .dcp_det = { 0x00c0, 23, 23, 0, 1 },
+ .dp_det = { 0x00c0, 25, 25, 0, 1 },
+ .idm_sink_en = { 0x0008, 8, 8, 0, 1 },
+ .idp_sink_en = { 0x0008, 7, 7, 0, 1 },
+ .idp_src_en = { 0x0008, 9, 9, 0, 1 },
+ .rdm_pdwn_en = { 0x0008, 10, 10, 0, 1 },
+ .vdm_src_en = { 0x0008, 12, 12, 0, 1 },
+ .vdp_src_en = { 0x0008, 11, 11, 0, 1 },
+ },
+ },
+ {
+ .reg = 0xfe8b0000,
+ .num_ports = 2,
+ .clkout_ctl = { 0x0008, 4, 4, 1, 0 },
+ .port_cfgs = {
+ [USB2PHY_PORT_OTG] = {
+ .phy_sus = { 0x0000, 8, 0, 0x1d2, 0x1d1 },
+ .ls_det_en = { 0x0080, 0, 0, 0, 1 },
+ .ls_det_st = { 0x0084, 0, 0, 0, 1 },
+ .ls_det_clr = { 0x0088, 0, 0, 0, 1 },
+ .utmi_ls = { 0x00c0, 5, 4, 0, 1 },
+ .utmi_hstdet = { 0x00c0, 7, 7, 0, 1 }
+ },
+ [USB2PHY_PORT_HOST] = {
+ .phy_sus = { 0x0004, 8, 0, 0x1d2, 0x1d1 },
+ .ls_det_en = { 0x0080, 1, 1, 0, 1 },
+ .ls_det_st = { 0x0084, 1, 1, 0, 1 },
+ .ls_det_clr = { 0x0088, 1, 1, 0, 1 },
+ .utmi_ls = { 0x00c0, 17, 16, 0, 1 },
+ .utmi_hstdet = { 0x00c0, 19, 19, 0, 1 }
+ }
+ },
+ },
+ { /* sentinel */ }
+};
+
static const struct rockchip_usb2phy_cfg rv1108_phy_cfgs[] = {
{
.reg = 0x100,
@@ -1467,6 +1616,7 @@ static const struct of_device_id rockchip_usb2phy_dt_match[] = {
{ .compatible = "rockchip,rk3328-usb2phy", .data = &rk3328_phy_cfgs },
{ .compatible = "rockchip,rk3366-usb2phy", .data = &rk3366_phy_cfgs },
{ .compatible = "rockchip,rk3399-usb2phy", .data = &rk3399_phy_cfgs },
+ { .compatible = "rockchip,rk3568-usb2phy", .data = &rk3568_phy_cfgs },
{ .compatible = "rockchip,rv1108-usb2phy", .data = &rv1108_phy_cfgs },
{}
};
diff --git a/drivers/phy/socionext/Kconfig b/drivers/phy/socionext/Kconfig
index a3970e0f89da..8ae644756352 100644
--- a/drivers/phy/socionext/Kconfig
+++ b/drivers/phy/socionext/Kconfig
@@ -43,4 +43,4 @@ config PHY_UNIPHIER_AHCI
select GENERIC_PHY
help
Enable this to support PHY implemented in AHCI controller
- on UniPhier SoCs. This driver supports PXs2 and PXs3 SoCs.
+ on UniPhier SoCs. This driver supports Pro4, PXs2 and PXs3 SoCs.
diff --git a/drivers/phy/socionext/phy-uniphier-ahci.c b/drivers/phy/socionext/phy-uniphier-ahci.c
index 7427c40bf4ae..28cf3efe0695 100644
--- a/drivers/phy/socionext/phy-uniphier-ahci.c
+++ b/drivers/phy/socionext/phy-uniphier-ahci.c
@@ -19,8 +19,9 @@
struct uniphier_ahciphy_priv {
struct device *dev;
void __iomem *base;
- struct clk *clk, *clk_parent;
- struct reset_control *rst, *rst_parent;
+ struct clk *clk, *clk_parent, *clk_parent_gio;
+ struct reset_control *rst, *rst_parent, *rst_parent_gio;
+ struct reset_control *rst_pm, *rst_tx, *rst_rx;
const struct uniphier_ahciphy_soc_data *data;
};
@@ -28,10 +29,30 @@ struct uniphier_ahciphy_soc_data {
int (*init)(struct uniphier_ahciphy_priv *priv);
int (*power_on)(struct uniphier_ahciphy_priv *priv);
int (*power_off)(struct uniphier_ahciphy_priv *priv);
+ bool is_legacy;
bool is_ready_high;
bool is_phy_clk;
};
+/* for Pro4 */
+#define CKCTRL0 0x0
+#define CKCTRL0_CK_OFF BIT(9)
+#define CKCTRL0_NCY_MASK GENMASK(8, 4)
+#define CKCTRL0_NCY5_MASK GENMASK(3, 2)
+#define CKCTRL0_PRESCALE_MASK GENMASK(1, 0)
+#define CKCTRL1 0x4
+#define CKCTRL1_LOS_LVL_MASK GENMASK(20, 16)
+#define CKCTRL1_TX_LVL_MASK GENMASK(12, 8)
+#define RXTXCTRL 0x8
+#define RXTXCTRL_RX_EQ_VALL_MASK GENMASK(31, 29)
+#define RXTXCTRL_RX_DPLL_MODE_MASK GENMASK(28, 26)
+#define RXTXCTRL_TX_ATTEN_MASK GENMASK(14, 12)
+#define RXTXCTRL_TX_BOOST_MASK GENMASK(11, 8)
+#define RXTXCTRL_TX_EDGERATE_MASK GENMASK(3, 2)
+#define RXTXCTRL_TX_CKO_EN BIT(0)
+#define RSTPWR 0x30
+#define RSTPWR_RX_EN_VAL BIT(18)
+
/* for PXs2/PXs3 */
#define CKCTRL 0x0
#define CKCTRL_P0_READY BIT(15)
@@ -50,6 +71,128 @@ struct uniphier_ahciphy_soc_data {
#define RXCTRL_LOS_BIAS_MASK GENMASK(10, 8)
#define RXCTRL_RX_EQ_MASK GENMASK(2, 0)
+static int uniphier_ahciphy_pro4_init(struct uniphier_ahciphy_priv *priv)
+{
+ u32 val;
+
+ /* set phy MPLL parameters */
+ val = readl(priv->base + CKCTRL0);
+ val &= ~CKCTRL0_NCY_MASK;
+ val |= FIELD_PREP(CKCTRL0_NCY_MASK, 0x6);
+ val &= ~CKCTRL0_NCY5_MASK;
+ val |= FIELD_PREP(CKCTRL0_NCY5_MASK, 0x2);
+ val &= ~CKCTRL0_PRESCALE_MASK;
+ val |= FIELD_PREP(CKCTRL0_PRESCALE_MASK, 0x1);
+ writel(val, priv->base + CKCTRL0);
+
+ /* setup phy control parameters */
+ val = readl(priv->base + CKCTRL1);
+ val &= ~CKCTRL1_LOS_LVL_MASK;
+ val |= FIELD_PREP(CKCTRL1_LOS_LVL_MASK, 0x10);
+ val &= ~CKCTRL1_TX_LVL_MASK;
+ val |= FIELD_PREP(CKCTRL1_TX_LVL_MASK, 0x06);
+ writel(val, priv->base + CKCTRL1);
+
+ val = readl(priv->base + RXTXCTRL);
+ val &= ~RXTXCTRL_RX_EQ_VALL_MASK;
+ val |= FIELD_PREP(RXTXCTRL_RX_EQ_VALL_MASK, 0x6);
+ val &= ~RXTXCTRL_RX_DPLL_MODE_MASK;
+ val |= FIELD_PREP(RXTXCTRL_RX_DPLL_MODE_MASK, 0x3);
+ val &= ~RXTXCTRL_TX_ATTEN_MASK;
+ val |= FIELD_PREP(RXTXCTRL_TX_ATTEN_MASK, 0x3);
+ val &= ~RXTXCTRL_TX_BOOST_MASK;
+ val |= FIELD_PREP(RXTXCTRL_TX_BOOST_MASK, 0x5);
+ val &= ~RXTXCTRL_TX_EDGERATE_MASK;
+ val |= FIELD_PREP(RXTXCTRL_TX_EDGERATE_MASK, 0x0);
+ writel(val, priv->base + RXTXCTRL);
+
+ return 0;
+}
+
+static int uniphier_ahciphy_pro4_power_on(struct uniphier_ahciphy_priv *priv)
+{
+ u32 val;
+ int ret;
+
+ /* enable reference clock for phy */
+ val = readl(priv->base + CKCTRL0);
+ val &= ~CKCTRL0_CK_OFF;
+ writel(val, priv->base + CKCTRL0);
+
+ /* enable TX clock */
+ val = readl(priv->base + RXTXCTRL);
+ val |= RXTXCTRL_TX_CKO_EN;
+ writel(val, priv->base + RXTXCTRL);
+
+ /* wait until RX is ready */
+ ret = readl_poll_timeout(priv->base + RSTPWR, val,
+ !(val & RSTPWR_RX_EN_VAL), 200, 2000);
+ if (ret) {
+ dev_err(priv->dev, "Failed to check whether Rx is ready\n");
+ goto out_disable_clock;
+ }
+
+ /* release all reset */
+ ret = reset_control_deassert(priv->rst_pm);
+ if (ret) {
+ dev_err(priv->dev, "Failed to release PM reset\n");
+ goto out_disable_clock;
+ }
+
+ ret = reset_control_deassert(priv->rst_tx);
+ if (ret) {
+ dev_err(priv->dev, "Failed to release Tx reset\n");
+ goto out_reset_pm_assert;
+ }
+
+ ret = reset_control_deassert(priv->rst_rx);
+ if (ret) {
+ dev_err(priv->dev, "Failed to release Rx reset\n");
+ goto out_reset_tx_assert;
+ }
+
+ return 0;
+
+out_reset_tx_assert:
+ reset_control_assert(priv->rst_tx);
+out_reset_pm_assert:
+ reset_control_assert(priv->rst_pm);
+
+out_disable_clock:
+ /* disable TX clock */
+ val = readl(priv->base + RXTXCTRL);
+ val &= ~RXTXCTRL_TX_CKO_EN;
+ writel(val, priv->base + RXTXCTRL);
+
+ /* disable reference clock for phy */
+ val = readl(priv->base + CKCTRL0);
+ val |= CKCTRL0_CK_OFF;
+ writel(val, priv->base + CKCTRL0);
+
+ return ret;
+}
+
+static int uniphier_ahciphy_pro4_power_off(struct uniphier_ahciphy_priv *priv)
+{
+ u32 val;
+
+ reset_control_assert(priv->rst_rx);
+ reset_control_assert(priv->rst_tx);
+ reset_control_assert(priv->rst_pm);
+
+ /* disable TX clock */
+ val = readl(priv->base + RXTXCTRL);
+ val &= ~RXTXCTRL_TX_CKO_EN;
+ writel(val, priv->base + RXTXCTRL);
+
+ /* disable reference clock for phy */
+ val = readl(priv->base + CKCTRL0);
+ val |= CKCTRL0_CK_OFF;
+ writel(val, priv->base + CKCTRL0);
+
+ return 0;
+}
+
static void uniphier_ahciphy_pxs2_enable(struct uniphier_ahciphy_priv *priv,
bool enable)
{
@@ -142,14 +285,22 @@ static int uniphier_ahciphy_init(struct phy *phy)
struct uniphier_ahciphy_priv *priv = phy_get_drvdata(phy);
int ret;
- ret = clk_prepare_enable(priv->clk_parent);
+ ret = clk_prepare_enable(priv->clk_parent_gio);
if (ret)
return ret;
- ret = reset_control_deassert(priv->rst_parent);
+ ret = clk_prepare_enable(priv->clk_parent);
+ if (ret)
+ goto out_clk_gio_disable;
+
+ ret = reset_control_deassert(priv->rst_parent_gio);
if (ret)
goto out_clk_disable;
+ ret = reset_control_deassert(priv->rst_parent);
+ if (ret)
+ goto out_rst_gio_assert;
+
if (priv->data->init) {
ret = priv->data->init(priv);
if (ret)
@@ -160,8 +311,12 @@ static int uniphier_ahciphy_init(struct phy *phy)
out_rst_assert:
reset_control_assert(priv->rst_parent);
+out_rst_gio_assert:
+ reset_control_assert(priv->rst_parent_gio);
out_clk_disable:
clk_disable_unprepare(priv->clk_parent);
+out_clk_gio_disable:
+ clk_disable_unprepare(priv->clk_parent_gio);
return ret;
}
@@ -171,7 +326,9 @@ static int uniphier_ahciphy_exit(struct phy *phy)
struct uniphier_ahciphy_priv *priv = phy_get_drvdata(phy);
reset_control_assert(priv->rst_parent);
+ reset_control_assert(priv->rst_parent_gio);
clk_disable_unprepare(priv->clk_parent);
+ clk_disable_unprepare(priv->clk_parent_gio);
return 0;
}
@@ -265,6 +422,28 @@ static int uniphier_ahciphy_probe(struct platform_device *pdev)
if (IS_ERR(priv->rst))
return PTR_ERR(priv->rst);
+ if (priv->data->is_legacy) {
+ priv->clk_parent_gio = devm_clk_get(dev, "gio");
+ if (IS_ERR(priv->clk_parent_gio))
+ return PTR_ERR(priv->clk_parent_gio);
+ priv->rst_parent_gio =
+ devm_reset_control_get_shared(dev, "gio");
+ if (IS_ERR(priv->rst_parent_gio))
+ return PTR_ERR(priv->rst_parent_gio);
+
+ priv->rst_pm = devm_reset_control_get_shared(dev, "pm");
+ if (IS_ERR(priv->rst_pm))
+ return PTR_ERR(priv->rst_pm);
+
+ priv->rst_tx = devm_reset_control_get_shared(dev, "tx");
+ if (IS_ERR(priv->rst_tx))
+ return PTR_ERR(priv->rst_tx);
+
+ priv->rst_rx = devm_reset_control_get_shared(dev, "rx");
+ if (IS_ERR(priv->rst_rx))
+ return PTR_ERR(priv->rst_rx);
+ }
+
phy = devm_phy_create(dev, dev->of_node, &uniphier_ahciphy_ops);
if (IS_ERR(phy)) {
dev_err(dev, "failed to create phy\n");
@@ -279,9 +458,18 @@ static int uniphier_ahciphy_probe(struct platform_device *pdev)
return 0;
}
+static const struct uniphier_ahciphy_soc_data uniphier_pro4_data = {
+ .init = uniphier_ahciphy_pro4_init,
+ .power_on = uniphier_ahciphy_pro4_power_on,
+ .power_off = uniphier_ahciphy_pro4_power_off,
+ .is_legacy = true,
+ .is_phy_clk = false,
+};
+
static const struct uniphier_ahciphy_soc_data uniphier_pxs2_data = {
.power_on = uniphier_ahciphy_pxs2_power_on,
.power_off = uniphier_ahciphy_pxs2_power_off,
+ .is_legacy = false,
.is_ready_high = false,
.is_phy_clk = false,
};
@@ -290,12 +478,17 @@ static const struct uniphier_ahciphy_soc_data uniphier_pxs3_data = {
.init = uniphier_ahciphy_pxs3_init,
.power_on = uniphier_ahciphy_pxs2_power_on,
.power_off = uniphier_ahciphy_pxs2_power_off,
+ .is_legacy = false,
.is_ready_high = true,
.is_phy_clk = true,
};
static const struct of_device_id uniphier_ahciphy_match[] = {
{
+ .compatible = "socionext,uniphier-pro4-ahci-phy",
+ .data = &uniphier_pro4_data,
+ },
+ {
.compatible = "socionext,uniphier-pxs2-ahci-phy",
.data = &uniphier_pxs2_data,
},
diff --git a/drivers/phy/socionext/phy-uniphier-pcie.c b/drivers/phy/socionext/phy-uniphier-pcie.c
index 6bdbd1f214dd..ebca296ef123 100644
--- a/drivers/phy/socionext/phy-uniphier-pcie.c
+++ b/drivers/phy/socionext/phy-uniphier-pcie.c
@@ -27,6 +27,7 @@
#define TESTI_DAT_MASK GENMASK(13, 6)
#define TESTI_ADR_MASK GENMASK(5, 1)
#define TESTI_WR_EN BIT(0)
+#define TESTIO_PHY_SHIFT 16
#define PCL_PHY_TEST_O 0x2004
#define TESTO_DAT_MASK GENMASK(7, 0)
@@ -39,6 +40,10 @@
#define SG_USBPCIESEL 0x590
#define SG_USBPCIESEL_PCIE BIT(0)
+/* SC */
+#define SC_US3SRCSEL 0x2244
+#define SC_US3SRCSEL_2LANE GENMASK(9, 8)
+
#define PCL_PHY_R00 0
#define RX_EQ_ADJ_EN BIT(3) /* enable for EQ adjustment */
#define PCL_PHY_R06 6
@@ -47,6 +52,9 @@
#define PCL_PHY_R26 26
#define VCO_CTRL GENMASK(7, 4) /* Tx VCO adjustment value */
#define VCO_CTRL_INIT_VAL 5
+#define PCL_PHY_R28 28
+#define VCOPLL_CLMP GENMASK(3, 2) /* Tx VCOPLL clamp mode */
+#define VCOPLL_CLMP_VAL 0
struct uniphier_pciephy_priv {
void __iomem *base;
@@ -58,43 +66,57 @@ struct uniphier_pciephy_priv {
struct uniphier_pciephy_soc_data {
bool is_legacy;
+ bool is_dual_phy;
void (*set_phymode)(struct regmap *regmap);
};
static void uniphier_pciephy_testio_write(struct uniphier_pciephy_priv *priv,
- u32 data)
+ int id, u32 data)
{
+ if (id)
+ data <<= TESTIO_PHY_SHIFT;
+
/* need to read TESTO twice after accessing TESTI */
writel(data, priv->base + PCL_PHY_TEST_I);
readl(priv->base + PCL_PHY_TEST_O);
readl(priv->base + PCL_PHY_TEST_O);
}
+static u32 uniphier_pciephy_testio_read(struct uniphier_pciephy_priv *priv, int id)
+{
+ u32 val = readl(priv->base + PCL_PHY_TEST_O);
+
+ if (id)
+ val >>= TESTIO_PHY_SHIFT;
+
+ return val & TESTO_DAT_MASK;
+}
+
static void uniphier_pciephy_set_param(struct uniphier_pciephy_priv *priv,
- u32 reg, u32 mask, u32 param)
+ int id, u32 reg, u32 mask, u32 param)
{
u32 val;
/* read previous data */
val = FIELD_PREP(TESTI_DAT_MASK, 1);
val |= FIELD_PREP(TESTI_ADR_MASK, reg);
- uniphier_pciephy_testio_write(priv, val);
- val = readl(priv->base + PCL_PHY_TEST_O) & TESTO_DAT_MASK;
+ uniphier_pciephy_testio_write(priv, id, val);
+ val = uniphier_pciephy_testio_read(priv, id);
/* update value */
val &= ~mask;
val |= mask & param;
val = FIELD_PREP(TESTI_DAT_MASK, val);
val |= FIELD_PREP(TESTI_ADR_MASK, reg);
- uniphier_pciephy_testio_write(priv, val);
- uniphier_pciephy_testio_write(priv, val | TESTI_WR_EN);
- uniphier_pciephy_testio_write(priv, val);
+ uniphier_pciephy_testio_write(priv, id, val);
+ uniphier_pciephy_testio_write(priv, id, val | TESTI_WR_EN);
+ uniphier_pciephy_testio_write(priv, id, val);
/* read current data as dummy */
val = FIELD_PREP(TESTI_DAT_MASK, 1);
val |= FIELD_PREP(TESTI_ADR_MASK, reg);
- uniphier_pciephy_testio_write(priv, val);
- readl(priv->base + PCL_PHY_TEST_O);
+ uniphier_pciephy_testio_write(priv, id, val);
+ uniphier_pciephy_testio_read(priv, id);
}
static void uniphier_pciephy_assert(struct uniphier_pciephy_priv *priv)
@@ -120,7 +142,7 @@ static int uniphier_pciephy_init(struct phy *phy)
{
struct uniphier_pciephy_priv *priv = phy_get_drvdata(phy);
u32 val;
- int ret;
+ int ret, id;
ret = clk_prepare_enable(priv->clk);
if (ret)
@@ -148,12 +170,16 @@ static int uniphier_pciephy_init(struct phy *phy)
if (priv->data->is_legacy)
return 0;
- uniphier_pciephy_set_param(priv, PCL_PHY_R00,
+ for (id = 0; id < (priv->data->is_dual_phy ? 2 : 1); id++) {
+ uniphier_pciephy_set_param(priv, id, PCL_PHY_R00,
RX_EQ_ADJ_EN, RX_EQ_ADJ_EN);
- uniphier_pciephy_set_param(priv, PCL_PHY_R06, RX_EQ_ADJ,
+ uniphier_pciephy_set_param(priv, id, PCL_PHY_R06, RX_EQ_ADJ,
FIELD_PREP(RX_EQ_ADJ, RX_EQ_ADJ_VAL));
- uniphier_pciephy_set_param(priv, PCL_PHY_R26, VCO_CTRL,
+ uniphier_pciephy_set_param(priv, id, PCL_PHY_R26, VCO_CTRL,
FIELD_PREP(VCO_CTRL, VCO_CTRL_INIT_VAL));
+ uniphier_pciephy_set_param(priv, id, PCL_PHY_R28, VCOPLL_CLMP,
+ FIELD_PREP(VCOPLL_CLMP, VCOPLL_CLMP_VAL));
+ }
usleep_range(1, 10);
uniphier_pciephy_deassert(priv);
@@ -261,17 +287,31 @@ static void uniphier_pciephy_ld20_setmode(struct regmap *regmap)
SG_USBPCIESEL_PCIE, SG_USBPCIESEL_PCIE);
}
+static void uniphier_pciephy_nx1_setmode(struct regmap *regmap)
+{
+ regmap_update_bits(regmap, SC_US3SRCSEL,
+ SC_US3SRCSEL_2LANE, SC_US3SRCSEL_2LANE);
+}
+
static const struct uniphier_pciephy_soc_data uniphier_pro5_data = {
.is_legacy = true,
};
static const struct uniphier_pciephy_soc_data uniphier_ld20_data = {
.is_legacy = false,
+ .is_dual_phy = false,
.set_phymode = uniphier_pciephy_ld20_setmode,
};
static const struct uniphier_pciephy_soc_data uniphier_pxs3_data = {
.is_legacy = false,
+ .is_dual_phy = false,
+};
+
+static const struct uniphier_pciephy_soc_data uniphier_nx1_data = {
+ .is_legacy = false,
+ .is_dual_phy = true,
+ .set_phymode = uniphier_pciephy_nx1_setmode,
};
static const struct of_device_id uniphier_pciephy_match[] = {
@@ -287,6 +327,10 @@ static const struct of_device_id uniphier_pciephy_match[] = {
.compatible = "socionext,uniphier-pxs3-pcie-phy",
.data = &uniphier_pxs3_data,
},
+ {
+ .compatible = "socionext,uniphier-nx1-pcie-phy",
+ .data = &uniphier_nx1_data,
+ },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, uniphier_pciephy_match);
diff --git a/drivers/phy/socionext/phy-uniphier-usb3hs.c b/drivers/phy/socionext/phy-uniphier-usb3hs.c
index a9bc74121f38..8c8673df0084 100644
--- a/drivers/phy/socionext/phy-uniphier-usb3hs.c
+++ b/drivers/phy/socionext/phy-uniphier-usb3hs.c
@@ -447,6 +447,10 @@ static const struct of_device_id uniphier_u3hsphy_match[] = {
.compatible = "socionext,uniphier-pxs3-usb3-hsphy",
.data = &uniphier_pxs3_data,
},
+ {
+ .compatible = "socionext,uniphier-nx1-usb3-hsphy",
+ .data = &uniphier_pxs3_data,
+ },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, uniphier_u3hsphy_match);
diff --git a/drivers/phy/socionext/phy-uniphier-usb3ss.c b/drivers/phy/socionext/phy-uniphier-usb3ss.c
index 6700645bcbe6..f402ed8732fd 100644
--- a/drivers/phy/socionext/phy-uniphier-usb3ss.c
+++ b/drivers/phy/socionext/phy-uniphier-usb3ss.c
@@ -22,11 +22,13 @@
#include <linux/reset.h>
#define SSPHY_TESTI 0x0
-#define SSPHY_TESTO 0x4
#define TESTI_DAT_MASK GENMASK(13, 6)
#define TESTI_ADR_MASK GENMASK(5, 1)
#define TESTI_WR_EN BIT(0)
+#define SSPHY_TESTO 0x4
+#define TESTO_DAT_MASK GENMASK(7, 0)
+
#define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) }
#define CDR_CPD_TRIM PHY_F(7, 3, 0) /* RxPLL charge pump current */
@@ -84,12 +86,12 @@ static void uniphier_u3ssphy_set_param(struct uniphier_u3ssphy_priv *priv,
val = FIELD_PREP(TESTI_DAT_MASK, 1);
val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
uniphier_u3ssphy_testio_write(priv, val);
- val = readl(priv->base + SSPHY_TESTO);
+ val = readl(priv->base + SSPHY_TESTO) & TESTO_DAT_MASK;
/* update value */
- val &= ~FIELD_PREP(TESTI_DAT_MASK, field_mask);
+ val &= ~field_mask;
data = field_mask & (p->value << p->field.lsb);
- val = FIELD_PREP(TESTI_DAT_MASK, data);
+ val = FIELD_PREP(TESTI_DAT_MASK, data | val);
val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
uniphier_u3ssphy_testio_write(priv, val);
uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN);
@@ -328,6 +330,10 @@ static const struct of_device_id uniphier_u3ssphy_match[] = {
.compatible = "socionext,uniphier-pxs3-usb3-ssphy",
.data = &uniphier_ld20_data,
},
+ {
+ .compatible = "socionext,uniphier-nx1-usb3-ssphy",
+ .data = &uniphier_ld20_data,
+ },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, uniphier_u3ssphy_match);
diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index e4f4a9be5132..2ce9bfd783d4 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -672,17 +672,15 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
usbphyc->vdda1v1 = devm_regulator_get(dev, "vdda1v1");
if (IS_ERR(usbphyc->vdda1v1)) {
- ret = PTR_ERR(usbphyc->vdda1v1);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to get vdda1v1 supply: %d\n", ret);
+ ret = dev_err_probe(dev, PTR_ERR(usbphyc->vdda1v1),
+ "failed to get vdda1v1 supply\n");
goto clk_disable;
}
usbphyc->vdda1v8 = devm_regulator_get(dev, "vdda1v8");
if (IS_ERR(usbphyc->vdda1v8)) {
- ret = PTR_ERR(usbphyc->vdda1v8);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to get vdda1v8 supply: %d\n", ret);
+ ret = dev_err_probe(dev, PTR_ERR(usbphyc->vdda1v8),
+ "failed to get vdda1v8 supply\n");
goto clk_disable;
}
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 963de5913e50..aa5237eacd29 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -455,7 +455,7 @@ tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
if (!name) {
of_node_put(ports);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
np = of_get_child_by_name(ports, name);
kfree(name);
diff --git a/drivers/phy/ti/phy-omap-control.c b/drivers/phy/ti/phy-omap-control.c
index 47482f106fab..76c5595f0859 100644
--- a/drivers/phy/ti/phy-omap-control.c
+++ b/drivers/phy/ti/phy-omap-control.c
@@ -26,7 +26,7 @@ void omap_control_pcie_pcs(struct device *dev, u8 delay)
u32 val;
struct omap_control_phy *control_phy;
- if (IS_ERR(dev) || !dev) {
+ if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: invalid device\n", __func__);
return;
}
@@ -61,7 +61,7 @@ void omap_control_phy_power(struct device *dev, int on)
unsigned long rate;
struct omap_control_phy *control_phy;
- if (IS_ERR(dev) || !dev) {
+ if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: invalid device\n", __func__);
return;
}
@@ -202,7 +202,7 @@ void omap_control_usb_set_mode(struct device *dev,
{
struct omap_control_phy *ctrl_phy;
- if (IS_ERR(dev) || !dev)
+ if (IS_ERR_OR_NULL(dev))
return;
ctrl_phy = dev_get_drvdata(dev);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 82fa6148216c..098180fb1cfc 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -880,14 +880,14 @@ static int dispatch_proc_show(struct seq_file *m, void *v)
static int dispatch_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, dispatch_proc_show, PDE_DATA(inode));
+ return single_open(file, dispatch_proc_show, pde_data(inode));
}
static ssize_t dispatch_proc_write(struct file *file,
const char __user *userbuf,
size_t count, loff_t *pos)
{
- struct ibm_struct *ibm = PDE_DATA(file_inode(file));
+ struct ibm_struct *ibm = pde_data(file_inode(file));
char *kernbuf;
int ret;
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 352508d30467..f113dec98e21 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1368,7 +1368,7 @@ static int lcd_proc_show(struct seq_file *m, void *v)
static int lcd_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, lcd_proc_show, PDE_DATA(inode));
+ return single_open(file, lcd_proc_show, pde_data(inode));
}
static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
@@ -1404,7 +1404,7 @@ static int set_lcd_status(struct backlight_device *bd)
static ssize_t lcd_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int levels;
@@ -1469,13 +1469,13 @@ static int video_proc_show(struct seq_file *m, void *v)
static int video_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, video_proc_show, PDE_DATA(inode));
+ return single_open(file, video_proc_show, pde_data(inode));
}
static ssize_t video_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char *buffer;
char *cmd;
int lcd_out = -1, crt_out = -1, tv_out = -1;
@@ -1580,13 +1580,13 @@ static int fan_proc_show(struct seq_file *m, void *v)
static int fan_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, fan_proc_show, PDE_DATA(inode));
+ return single_open(file, fan_proc_show, pde_data(inode));
}
static ssize_t fan_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int value;
@@ -1628,13 +1628,13 @@ static int keys_proc_show(struct seq_file *m, void *v)
static int keys_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, keys_proc_show, PDE_DATA(inode));
+ return single_open(file, keys_proc_show, pde_data(inode));
}
static ssize_t keys_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
+ struct toshiba_acpi_dev *dev = pde_data(file_inode(file));
char cmd[42];
size_t len;
int value;
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 1ae458c02656..55ae72a2818b 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -22,7 +22,7 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf,
size_t nbytes, loff_t * ppos)
{
- struct pnp_dev *dev = PDE_DATA(file_inode(file));
+ struct pnp_dev *dev = pde_data(file_inode(file));
int pos = *ppos;
int cnt, size = 256;
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 669ef4700c1a..f7e86ae9f72f 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -160,7 +160,7 @@ static int pnp_dock_thread(void *unused)
* No dock to manage
*/
case PNP_FUNCTION_NOT_SUPPORTED:
- complete_and_exit(&unload_sem, 0);
+ kthread_complete_and_exit(&unload_sem, 0);
case PNP_SYSTEM_NOT_DOCKED:
d = 0;
break;
@@ -170,7 +170,7 @@ static int pnp_dock_thread(void *unused)
default:
pnpbios_print_status("pnp_dock_thread", status);
printk(KERN_WARNING "PnPBIOS: disabling dock monitoring.\n");
- complete_and_exit(&unload_sem, 0);
+ kthread_complete_and_exit(&unload_sem, 0);
}
if (d != docked) {
if (pnp_dock_event(d, &now) == 0) {
@@ -183,7 +183,7 @@ static int pnp_dock_thread(void *unused)
}
}
}
- complete_and_exit(&unload_sem, 0);
+ kthread_complete_and_exit(&unload_sem, 0);
}
static int pnpbios_get_resources(struct pnp_dev *dev)
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index a806830e3a40..0f0d819b157f 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -173,13 +173,13 @@ static int pnpbios_proc_show(struct seq_file *m, void *v)
static int pnpbios_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, pnpbios_proc_show, PDE_DATA(inode));
+ return single_open(file, pnpbios_proc_show, pde_data(inode));
}
static ssize_t pnpbios_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
- void *data = PDE_DATA(file_inode(file));
+ void *data = pde_data(file_inode(file));
struct pnp_bios_node *node;
int boot = (long)data >> 8;
u8 nodenum = (long)data;
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 93772ab8d7e3..c7552df32082 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -548,6 +548,73 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
}
}
+static int pwm_apply_legacy(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+ struct pwm_state initial_state = pwm->state;
+
+ if (state->polarity != pwm->state.polarity) {
+ if (!chip->ops->set_polarity)
+ return -EINVAL;
+
+ /*
+ * Changing the polarity of a running PWM is only allowed when
+ * the PWM driver implements ->apply().
+ */
+ if (pwm->state.enabled) {
+ chip->ops->disable(chip, pwm);
+
+ /*
+ * Update pwm->state already here in case
+ * .set_polarity() or another callback depend on that.
+ */
+ pwm->state.enabled = false;
+ }
+
+ err = chip->ops->set_polarity(chip, pwm, state->polarity);
+ if (err)
+ goto rollback;
+
+ pwm->state.polarity = state->polarity;
+ }
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ chip->ops->disable(chip, pwm);
+
+ return 0;
+ }
+
+ /*
+ * We cannot skip calling ->config even if state->period ==
+ * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+ * because we might have exited early in the last call to
+ * pwm_apply_state because of !state->enabled and so the two values in
+ * pwm->state might not be configured in hardware.
+ */
+ err = chip->ops->config(pwm->chip, pwm,
+ state->duty_cycle,
+ state->period);
+ if (err)
+ goto rollback;
+
+ pwm->state.period = state->period;
+ pwm->state.duty_cycle = state->duty_cycle;
+
+ if (!pwm->state.enabled) {
+ err = chip->ops->enable(chip, pwm);
+ if (err)
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ pwm->state = initial_state;
+ return err;
+}
+
/**
* pwm_apply_state() - atomically apply a new state to a PWM device
* @pwm: PWM device
@@ -580,70 +647,22 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
state->usage_power == pwm->state.usage_power)
return 0;
- if (chip->ops->apply) {
+ if (chip->ops->apply)
err = chip->ops->apply(chip, pwm, state);
- if (err)
- return err;
-
- trace_pwm_apply(pwm, state);
-
- pwm->state = *state;
-
- /*
- * only do this after pwm->state was applied as some
- * implementations of .get_state depend on this
- */
- pwm_apply_state_debug(pwm, state);
- } else {
- /*
- * FIXME: restore the initial state in case of error.
- */
- if (state->polarity != pwm->state.polarity) {
- if (!chip->ops->set_polarity)
- return -EINVAL;
+ else
+ err = pwm_apply_legacy(chip, pwm, state);
+ if (err)
+ return err;
- /*
- * Changing the polarity of a running PWM is
- * only allowed when the PWM driver implements
- * ->apply().
- */
- if (pwm->state.enabled) {
- chip->ops->disable(chip, pwm);
- pwm->state.enabled = false;
- }
+ trace_pwm_apply(pwm, state);
- err = chip->ops->set_polarity(chip, pwm,
- state->polarity);
- if (err)
- return err;
+ pwm->state = *state;
- pwm->state.polarity = state->polarity;
- }
-
- if (state->period != pwm->state.period ||
- state->duty_cycle != pwm->state.duty_cycle) {
- err = chip->ops->config(pwm->chip, pwm,
- state->duty_cycle,
- state->period);
- if (err)
- return err;
-
- pwm->state.duty_cycle = state->duty_cycle;
- pwm->state.period = state->period;
- }
-
- if (state->enabled != pwm->state.enabled) {
- if (state->enabled) {
- err = chip->ops->enable(chip, pwm);
- if (err)
- return err;
- } else {
- chip->ops->disable(chip, pwm);
- }
-
- pwm->state.enabled = state->enabled;
- }
- }
+ /*
+ * only do this after pwm->state was applied as some
+ * implementations of .get_state depend on this
+ */
+ pwm_apply_state_debug(pwm, state);
return 0;
}
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index f97f82548293..5996049f66ec 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -128,11 +128,9 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
duty = DIV_ROUND_UP(timebase * duty_ns, period_ns);
- ret = pm_runtime_get_sync(chip->dev);
- if (ret < 0) {
- pm_runtime_put_autosuspend(chip->dev);
+ ret = pm_runtime_resume_and_get(chip->dev);
+ if (ret < 0)
return ret;
- }
val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG);
val &= ~(PWM_CTRL_CFG_DIV_MASK << PWM_CTRL_CFG_DIV_SHIFT(pwm->hwpwm));
@@ -184,10 +182,33 @@ static void img_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
pm_runtime_put_autosuspend(chip->dev);
}
+static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ img_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = img_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = img_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops img_pwm_ops = {
- .config = img_pwm_config,
- .enable = img_pwm_enable,
- .disable = img_pwm_disable,
+ .apply = img_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index 203194f2c92e..86567add79db 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -58,9 +58,9 @@ static inline struct twl_pwm_chip *to_twl(struct pwm_chip *chip)
}
static int twl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+ u64 duty_ns, u64 period_ns)
{
- int duty_cycle = DIV_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
+ int duty_cycle = DIV64_U64_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
u8 pwm_config[2] = { 1, 0 };
int base, ret;
@@ -279,19 +279,65 @@ out:
mutex_unlock(&twl->mutex);
}
+static int twl4030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ twl4030_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = twl4030_pwm_enable(chip, pwm);
+
+ return err;
+}
+
+static int twl6030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ if (pwm->state.enabled)
+ twl6030_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!pwm->state.enabled)
+ err = twl6030_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops twl4030_pwm_ops = {
- .config = twl_pwm_config,
- .enable = twl4030_pwm_enable,
- .disable = twl4030_pwm_disable,
+ .apply = twl4030_pwm_apply,
.request = twl4030_pwm_request,
.free = twl4030_pwm_free,
.owner = THIS_MODULE,
};
static const struct pwm_ops twl6030_pwm_ops = {
- .config = twl_pwm_config,
- .enable = twl6030_pwm_enable,
- .disable = twl6030_pwm_disable,
+ .apply = twl6030_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index 480bfc29782f..7170a315535b 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -70,7 +70,7 @@ static inline void vt8500_pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 b
}
static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+ u64 duty_ns, u64 period_ns)
{
struct vt8500_chip *vt8500 = to_vt8500_chip(chip);
unsigned long long c;
@@ -102,8 +102,8 @@ static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
}
c = (unsigned long long)pv * duty_ns;
- do_div(c, period_ns);
- dc = c;
+
+ dc = div64_u64(c, period_ns);
writel(prescale, vt8500->base + REG_SCALAR(pwm->hwpwm));
vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE);
@@ -176,11 +176,54 @@ static int vt8500_pwm_set_polarity(struct pwm_chip *chip,
return 0;
}
+static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int err;
+ bool enabled = pwm->state.enabled;
+
+ if (state->polarity != pwm->state.polarity) {
+ /*
+ * Changing the polarity of a running PWM is only allowed when
+ * the PWM driver implements ->apply().
+ */
+ if (enabled) {
+ vt8500_pwm_disable(chip, pwm);
+
+ enabled = false;
+ }
+
+ err = vt8500_pwm_set_polarity(chip, pwm, state->polarity);
+ if (err)
+ return err;
+ }
+
+ if (!state->enabled) {
+ if (enabled)
+ vt8500_pwm_disable(chip, pwm);
+
+ return 0;
+ }
+
+ /*
+ * We cannot skip calling ->config even if state->period ==
+ * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+ * because we might have exited early in the last call to
+ * pwm_apply_state because of !state->enabled and so the two values in
+ * pwm->state might not be configured in hardware.
+ */
+ err = vt8500_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+ if (err)
+ return err;
+
+ if (!enabled)
+ err = vt8500_pwm_enable(chip, pwm);
+
+ return err;
+}
+
static const struct pwm_ops vt8500_pwm_ops = {
- .enable = vt8500_pwm_enable,
- .disable = vt8500_pwm_disable,
- .config = vt8500_pwm_config,
- .set_polarity = vt8500_pwm_set_polarity,
+ .apply = vt8500_pwm_apply,
.owner = THIS_MODULE,
};
diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig
index 3e18f9c51e29..02771ba3e54f 100644
--- a/drivers/rapidio/switches/Kconfig
+++ b/drivers/rapidio/switches/Kconfig
@@ -2,22 +2,11 @@
#
# RapidIO switches configuration
#
-config RAPIDIO_TSI57X
- tristate "IDT Tsi57x SRIO switches support"
- help
- Includes support for IDT Tsi57x family of serial RapidIO switches.
-
config RAPIDIO_CPS_XX
tristate "IDT CPS-xx SRIO switches support"
help
Includes support for IDT CPS-16/12/10/8 serial RapidIO switches.
-config RAPIDIO_TSI568
- tristate "Tsi568 SRIO switch support"
- default n
- help
- Includes support for IDT Tsi568 serial RapidIO switch.
-
config RAPIDIO_CPS_GEN2
tristate "IDT CPS Gen.2 SRIO switch support"
default n
diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile
index 69e7de31e41c..ef1749a79c2b 100644
--- a/drivers/rapidio/switches/Makefile
+++ b/drivers/rapidio/switches/Makefile
@@ -3,8 +3,6 @@
# Makefile for RIO switches
#
-obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o
obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o
-obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o
obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o
obj-$(CONFIG_RAPIDIO_RXS_GEN3) += idt_gen3.o
diff --git a/drivers/rapidio/switches/tsi568.c b/drivers/rapidio/switches/tsi568.c
deleted file mode 100644
index 103b48a24980..000000000000
--- a/drivers/rapidio/switches/tsi568.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RapidIO Tsi568 switch support
- *
- * Copyright 2009-2010 Integrated Device Technology, Inc.
- * Alexandre Bounine <alexandre.bounine@idt.com>
- * - Added EM support
- * - Modified switch operations initialization.
- *
- * Copyright 2005 MontaVista Software, Inc.
- * Matt Porter <mporter@kernel.crashing.org>
- */
-
-#include <linux/rio.h>
-#include <linux/rio_drv.h>
-#include <linux/rio_ids.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include "../rio.h"
-
-/* Global (broadcast) route registers */
-#define SPBC_ROUTE_CFG_DESTID 0x10070
-#define SPBC_ROUTE_CFG_PORT 0x10074
-
-/* Per port route registers */
-#define SPP_ROUTE_CFG_DESTID(n) (0x11070 + 0x100*n)
-#define SPP_ROUTE_CFG_PORT(n) (0x11074 + 0x100*n)
-
-#define TSI568_SP_MODE(n) (0x11004 + 0x100*n)
-#define TSI568_SP_MODE_PW_DIS 0x08000000
-
-static int
-tsi568_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table, u16 route_destid, u8 route_port)
-{
- if (table == RIO_GLOBAL_TABLE) {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_DESTID, route_destid);
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_PORT, route_port);
- } else {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table),
- route_destid);
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table), route_port);
- }
-
- udelay(10);
-
- return 0;
-}
-
-static int
-tsi568_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table, u16 route_destid, u8 *route_port)
-{
- int ret = 0;
- u32 result;
-
- if (table == RIO_GLOBAL_TABLE) {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_DESTID, route_destid);
- rio_mport_read_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_PORT, &result);
- } else {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table),
- route_destid);
- rio_mport_read_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table), &result);
- }
-
- *route_port = result;
- if (*route_port > 15)
- ret = -1;
-
- return ret;
-}
-
-static int
-tsi568_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table)
-{
- u32 route_idx;
- u32 lut_size;
-
- lut_size = (mport->sys_size) ? 0x1ff : 0xff;
-
- if (table == RIO_GLOBAL_TABLE) {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_DESTID, 0x80000000);
- for (route_idx = 0; route_idx <= lut_size; route_idx++)
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_PORT,
- RIO_INVALID_ROUTE);
- } else {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table),
- 0x80000000);
- for (route_idx = 0; route_idx <= lut_size; route_idx++)
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table),
- RIO_INVALID_ROUTE);
- }
-
- return 0;
-}
-
-static int
-tsi568_em_init(struct rio_dev *rdev)
-{
- u32 regval;
- int portnum;
-
- pr_debug("TSI568 %s [%d:%d]\n", __func__, rdev->destid, rdev->hopcount);
-
- /* Make sure that Port-Writes are disabled (for all ports) */
- for (portnum = 0;
- portnum < RIO_GET_TOTAL_PORTS(rdev->swpinfo); portnum++) {
- rio_read_config_32(rdev, TSI568_SP_MODE(portnum), &regval);
- rio_write_config_32(rdev, TSI568_SP_MODE(portnum),
- regval | TSI568_SP_MODE_PW_DIS);
- }
-
- return 0;
-}
-
-static struct rio_switch_ops tsi568_switch_ops = {
- .owner = THIS_MODULE,
- .add_entry = tsi568_route_add_entry,
- .get_entry = tsi568_route_get_entry,
- .clr_table = tsi568_route_clr_table,
- .set_domain = NULL,
- .get_domain = NULL,
- .em_init = tsi568_em_init,
- .em_handle = NULL,
-};
-
-static int tsi568_probe(struct rio_dev *rdev, const struct rio_device_id *id)
-{
- pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
-
- spin_lock(&rdev->rswitch->lock);
-
- if (rdev->rswitch->ops) {
- spin_unlock(&rdev->rswitch->lock);
- return -EINVAL;
- }
-
- rdev->rswitch->ops = &tsi568_switch_ops;
- spin_unlock(&rdev->rswitch->lock);
- return 0;
-}
-
-static void tsi568_remove(struct rio_dev *rdev)
-{
- pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
- spin_lock(&rdev->rswitch->lock);
- if (rdev->rswitch->ops != &tsi568_switch_ops) {
- spin_unlock(&rdev->rswitch->lock);
- return;
- }
- rdev->rswitch->ops = NULL;
- spin_unlock(&rdev->rswitch->lock);
-}
-
-static const struct rio_device_id tsi568_id_table[] = {
- {RIO_DEVICE(RIO_DID_TSI568, RIO_VID_TUNDRA)},
- { 0, } /* terminate list */
-};
-
-static struct rio_driver tsi568_driver = {
- .name = "tsi568",
- .id_table = tsi568_id_table,
- .probe = tsi568_probe,
- .remove = tsi568_remove,
-};
-
-static int __init tsi568_init(void)
-{
- return rio_register_driver(&tsi568_driver);
-}
-
-static void __exit tsi568_exit(void)
-{
- rio_unregister_driver(&tsi568_driver);
-}
-
-device_initcall(tsi568_init);
-module_exit(tsi568_exit);
-
-MODULE_DESCRIPTION("IDT Tsi568 Serial RapidIO switch driver");
-MODULE_AUTHOR("Integrated Device Technology, Inc.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c
deleted file mode 100644
index 271762046f8c..000000000000
--- a/drivers/rapidio/switches/tsi57x.c
+++ /dev/null
@@ -1,365 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RapidIO Tsi57x switch family support
- *
- * Copyright 2009-2010 Integrated Device Technology, Inc.
- * Alexandre Bounine <alexandre.bounine@idt.com>
- * - Added EM support
- * - Modified switch operations initialization.
- *
- * Copyright 2005 MontaVista Software, Inc.
- * Matt Porter <mporter@kernel.crashing.org>
- */
-
-#include <linux/rio.h>
-#include <linux/rio_drv.h>
-#include <linux/rio_ids.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include "../rio.h"
-
-/* Global (broadcast) route registers */
-#define SPBC_ROUTE_CFG_DESTID 0x10070
-#define SPBC_ROUTE_CFG_PORT 0x10074
-
-/* Per port route registers */
-#define SPP_ROUTE_CFG_DESTID(n) (0x11070 + 0x100*n)
-#define SPP_ROUTE_CFG_PORT(n) (0x11074 + 0x100*n)
-
-#define TSI578_SP_MODE(n) (0x11004 + n*0x100)
-#define TSI578_SP_MODE_GLBL 0x10004
-#define TSI578_SP_MODE_PW_DIS 0x08000000
-#define TSI578_SP_MODE_LUT_512 0x01000000
-
-#define TSI578_SP_CTL_INDEP(n) (0x13004 + n*0x100)
-#define TSI578_SP_LUT_PEINF(n) (0x13010 + n*0x100)
-#define TSI578_SP_CS_TX(n) (0x13014 + n*0x100)
-#define TSI578_SP_INT_STATUS(n) (0x13018 + n*0x100)
-
-#define TSI578_GLBL_ROUTE_BASE 0x10078
-
-static int
-tsi57x_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table, u16 route_destid, u8 route_port)
-{
- if (table == RIO_GLOBAL_TABLE) {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_DESTID, route_destid);
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_PORT, route_port);
- } else {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table), route_destid);
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table), route_port);
- }
-
- udelay(10);
-
- return 0;
-}
-
-static int
-tsi57x_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table, u16 route_destid, u8 *route_port)
-{
- int ret = 0;
- u32 result;
-
- if (table == RIO_GLOBAL_TABLE) {
- /* Use local RT of the ingress port to avoid possible
- race condition */
- rio_mport_read_config_32(mport, destid, hopcount,
- RIO_SWP_INFO_CAR, &result);
- table = (result & RIO_SWP_INFO_PORT_NUM_MASK);
- }
-
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table), route_destid);
- rio_mport_read_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table), &result);
-
- *route_port = (u8)result;
- if (*route_port > 15)
- ret = -1;
-
- return ret;
-}
-
-static int
-tsi57x_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
- u16 table)
-{
- u32 route_idx;
- u32 lut_size;
-
- lut_size = (mport->sys_size) ? 0x1ff : 0xff;
-
- if (table == RIO_GLOBAL_TABLE) {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_DESTID, 0x80000000);
- for (route_idx = 0; route_idx <= lut_size; route_idx++)
- rio_mport_write_config_32(mport, destid, hopcount,
- SPBC_ROUTE_CFG_PORT,
- RIO_INVALID_ROUTE);
- } else {
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_DESTID(table), 0x80000000);
- for (route_idx = 0; route_idx <= lut_size; route_idx++)
- rio_mport_write_config_32(mport, destid, hopcount,
- SPP_ROUTE_CFG_PORT(table) , RIO_INVALID_ROUTE);
- }
-
- return 0;
-}
-
-static int
-tsi57x_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
- u8 sw_domain)
-{
- u32 regval;
-
- /*
- * Switch domain configuration operates only at global level
- */
-
- /* Turn off flat (LUT_512) mode */
- rio_mport_read_config_32(mport, destid, hopcount,
- TSI578_SP_MODE_GLBL, &regval);
- rio_mport_write_config_32(mport, destid, hopcount, TSI578_SP_MODE_GLBL,
- regval & ~TSI578_SP_MODE_LUT_512);
- /* Set switch domain base */
- rio_mport_write_config_32(mport, destid, hopcount,
- TSI578_GLBL_ROUTE_BASE,
- (u32)(sw_domain << 24));
- return 0;
-}
-
-static int
-tsi57x_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
- u8 *sw_domain)
-{
- u32 regval;
-
- /*
- * Switch domain configuration operates only at global level
- */
- rio_mport_read_config_32(mport, destid, hopcount,
- TSI578_GLBL_ROUTE_BASE, &regval);
-
- *sw_domain = (u8)(regval >> 24);
-
- return 0;
-}
-
-static int
-tsi57x_em_init(struct rio_dev *rdev)
-{
- u32 regval;
- int portnum;
-
- pr_debug("TSI578 %s [%d:%d]\n", __func__, rdev->destid, rdev->hopcount);
-
- for (portnum = 0;
- portnum < RIO_GET_TOTAL_PORTS(rdev->swpinfo); portnum++) {
- /* Make sure that Port-Writes are enabled (for all ports) */
- rio_read_config_32(rdev,
- TSI578_SP_MODE(portnum), &regval);
- rio_write_config_32(rdev,
- TSI578_SP_MODE(portnum),
- regval & ~TSI578_SP_MODE_PW_DIS);
-
- /* Clear all pending interrupts */
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum),
- &regval);
- rio_write_config_32(rdev,
- RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum),
- regval & 0x07120214);
-
- rio_read_config_32(rdev,
- TSI578_SP_INT_STATUS(portnum), &regval);
- rio_write_config_32(rdev,
- TSI578_SP_INT_STATUS(portnum),
- regval & 0x000700bd);
-
- /* Enable all interrupts to allow ports to send a port-write */
- rio_read_config_32(rdev,
- TSI578_SP_CTL_INDEP(portnum), &regval);
- rio_write_config_32(rdev,
- TSI578_SP_CTL_INDEP(portnum),
- regval | 0x000b0000);
-
- /* Skip next (odd) port if the current port is in x4 mode */
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_CTL_CSR(rdev, portnum),
- &regval);
- if ((regval & RIO_PORT_N_CTL_PWIDTH) == RIO_PORT_N_CTL_PWIDTH_4)
- portnum++;
- }
-
- /* set TVAL = ~50us */
- rio_write_config_32(rdev,
- rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x9a << 8);
-
- return 0;
-}
-
-static int
-tsi57x_em_handler(struct rio_dev *rdev, u8 portnum)
-{
- struct rio_mport *mport = rdev->net->hport;
- u32 intstat, err_status;
- int sendcount, checkcount;
- u8 route_port;
- u32 regval;
-
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum),
- &err_status);
-
- if ((err_status & RIO_PORT_N_ERR_STS_PORT_OK) &&
- (err_status & (RIO_PORT_N_ERR_STS_OUT_ES |
- RIO_PORT_N_ERR_STS_INP_ES))) {
- /* Remove any queued packets by locking/unlocking port */
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_CTL_CSR(rdev, portnum),
- &regval);
- if (!(regval & RIO_PORT_N_CTL_LOCKOUT)) {
- rio_write_config_32(rdev,
- RIO_DEV_PORT_N_CTL_CSR(rdev, portnum),
- regval | RIO_PORT_N_CTL_LOCKOUT);
- udelay(50);
- rio_write_config_32(rdev,
- RIO_DEV_PORT_N_CTL_CSR(rdev, portnum),
- regval);
- }
-
- /* Read from link maintenance response register to clear
- * valid bit
- */
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, portnum),
- &regval);
-
- /* Send a Packet-Not-Accepted/Link-Request-Input-Status control
- * symbol to recover from IES/OES
- */
- sendcount = 3;
- while (sendcount) {
- rio_write_config_32(rdev,
- TSI578_SP_CS_TX(portnum), 0x40fc8000);
- checkcount = 3;
- while (checkcount--) {
- udelay(50);
- rio_read_config_32(rdev,
- RIO_DEV_PORT_N_MNT_RSP_CSR(rdev,
- portnum),
- &regval);
- if (regval & RIO_PORT_N_MNT_RSP_RVAL)
- goto exit_es;
- }
-
- sendcount--;
- }
- }
-
-exit_es:
- /* Clear implementation specific error status bits */
- rio_read_config_32(rdev, TSI578_SP_INT_STATUS(portnum), &intstat);
- pr_debug("TSI578[%x:%x] SP%d_INT_STATUS=0x%08x\n",
- rdev->destid, rdev->hopcount, portnum, intstat);
-
- if (intstat & 0x10000) {
- rio_read_config_32(rdev,
- TSI578_SP_LUT_PEINF(portnum), &regval);
- regval = (mport->sys_size) ? (regval >> 16) : (regval >> 24);
- route_port = rdev->rswitch->route_table[regval];
- pr_debug("RIO: TSI578[%s] P%d LUT Parity Error (destID=%d)\n",
- rio_name(rdev), portnum, regval);
- tsi57x_route_add_entry(mport, rdev->destid, rdev->hopcount,
- RIO_GLOBAL_TABLE, regval, route_port);
- }
-
- rio_write_config_32(rdev, TSI578_SP_INT_STATUS(portnum),
- intstat & 0x000700bd);
-
- return 0;
-}
-
-static struct rio_switch_ops tsi57x_switch_ops = {
- .owner = THIS_MODULE,
- .add_entry = tsi57x_route_add_entry,
- .get_entry = tsi57x_route_get_entry,
- .clr_table = tsi57x_route_clr_table,
- .set_domain = tsi57x_set_domain,
- .get_domain = tsi57x_get_domain,
- .em_init = tsi57x_em_init,
- .em_handle = tsi57x_em_handler,
-};
-
-static int tsi57x_probe(struct rio_dev *rdev, const struct rio_device_id *id)
-{
- pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
-
- spin_lock(&rdev->rswitch->lock);
-
- if (rdev->rswitch->ops) {
- spin_unlock(&rdev->rswitch->lock);
- return -EINVAL;
- }
- rdev->rswitch->ops = &tsi57x_switch_ops;
-
- if (rdev->do_enum) {
- /* Ensure that default routing is disabled on startup */
- rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT,
- RIO_INVALID_ROUTE);
- }
-
- spin_unlock(&rdev->rswitch->lock);
- return 0;
-}
-
-static void tsi57x_remove(struct rio_dev *rdev)
-{
- pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
- spin_lock(&rdev->rswitch->lock);
- if (rdev->rswitch->ops != &tsi57x_switch_ops) {
- spin_unlock(&rdev->rswitch->lock);
- return;
- }
- rdev->rswitch->ops = NULL;
- spin_unlock(&rdev->rswitch->lock);
-}
-
-static const struct rio_device_id tsi57x_id_table[] = {
- {RIO_DEVICE(RIO_DID_TSI572, RIO_VID_TUNDRA)},
- {RIO_DEVICE(RIO_DID_TSI574, RIO_VID_TUNDRA)},
- {RIO_DEVICE(RIO_DID_TSI577, RIO_VID_TUNDRA)},
- {RIO_DEVICE(RIO_DID_TSI578, RIO_VID_TUNDRA)},
- { 0, } /* terminate list */
-};
-
-static struct rio_driver tsi57x_driver = {
- .name = "tsi57x",
- .id_table = tsi57x_id_table,
- .probe = tsi57x_probe,
- .remove = tsi57x_remove,
-};
-
-static int __init tsi57x_init(void)
-{
- return rio_register_driver(&tsi57x_driver);
-}
-
-static void __exit tsi57x_exit(void)
-{
- rio_unregister_driver(&tsi57x_driver);
-}
-
-device_initcall(tsi57x_init);
-module_exit(tsi57x_exit);
-
-MODULE_DESCRIPTION("IDT Tsi57x Serial RapidIO switch family driver");
-MODULE_AUTHOR("Integrated Device Technology, Inc.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index f2e961f998ca..3ddd426fc969 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -283,6 +283,17 @@ config QCOM_WCNSS_PIL
verified and booted with the help of the Peripheral Authentication
System (PAS) in TrustZone.
+config RCAR_REMOTEPROC
+ tristate "Renesas R-Car Gen3 remoteproc support"
+ depends on ARCH_RENESAS || COMPILE_TEST
+ help
+ Say y here to support R-Car realtime processor via the
+ remote processor framework. An ELF firmware can be loaded
+ thanks to sysfs remoteproc entries. The remote processor
+ can be started and stopped.
+ This can be either built-in or a loadable module.
+ If compiled as module (M), the module name is rcar_rproc.
+
config ST_REMOTEPROC
tristate "ST remoteproc support"
depends on ARCH_STI
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 0ac256b6c977..5478c7cb9e07 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_QCOM_SYSMON) += qcom_sysmon.o
obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o
qcom_wcnss_pil-y += qcom_wcnss.o
qcom_wcnss_pil-y += qcom_wcnss_iris.o
+obj-$(CONFIG_RCAR_REMOTEPROC) += rcar_rproc.o
obj-$(CONFIG_ST_REMOTEPROC) += st_remoteproc.o
obj-$(CONFIG_ST_SLIM_REMOTEPROC) += st_slim_rproc.o
obj-$(CONFIG_STM32_RPROC) += stm32_rproc.o
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index ff8170dbbc3c..7a096f1891e6 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -34,7 +34,8 @@
#define IMX7D_M4_START (IMX7D_ENABLE_M4 | IMX7D_SW_M4P_RST \
| IMX7D_SW_M4C_RST)
-#define IMX7D_M4_STOP IMX7D_SW_M4C_NON_SCLR_RST
+#define IMX7D_M4_STOP (IMX7D_ENABLE_M4 | IMX7D_SW_M4C_RST | \
+ IMX7D_SW_M4C_NON_SCLR_RST)
/* Address: 0x020D8000 */
#define IMX6SX_SRC_SCR 0x00
@@ -45,7 +46,8 @@
#define IMX6SX_M4_START (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4P_RST \
| IMX6SX_SW_M4C_RST)
-#define IMX6SX_M4_STOP IMX6SX_SW_M4C_NON_SCLR_RST
+#define IMX6SX_M4_STOP (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4C_RST | \
+ IMX6SX_SW_M4C_NON_SCLR_RST)
#define IMX6SX_M4_RST_MASK (IMX6SX_ENABLE_M4 | IMX6SX_SW_M4P_RST \
| IMX6SX_SW_M4C_NON_SCLR_RST \
| IMX6SX_SW_M4C_RST)
@@ -684,7 +686,7 @@ static int imx_rproc_detect_mode(struct imx_rproc *priv)
return ret;
}
- if (!(val & dcfg->src_stop))
+ if ((val & dcfg->src_mask) != dcfg->src_stop)
priv->rproc->state = RPROC_DETACHED;
return 0;
@@ -804,6 +806,7 @@ static int imx_rproc_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->clk);
rproc_del(rproc);
imx_rproc_free_mbox(rproc);
+ destroy_workqueue(priv->workqueue);
rproc_free(rproc);
return 0;
diff --git a/drivers/remoteproc/ingenic_rproc.c b/drivers/remoteproc/ingenic_rproc.c
index a356738160a4..9902cce28692 100644
--- a/drivers/remoteproc/ingenic_rproc.c
+++ b/drivers/remoteproc/ingenic_rproc.c
@@ -218,14 +218,13 @@ static int ingenic_rproc_probe(struct platform_device *pdev)
if (vpu->irq < 0)
return vpu->irq;
- ret = devm_request_irq(dev, vpu->irq, vpu_interrupt, 0, "VPU", rproc);
+ ret = devm_request_irq(dev, vpu->irq, vpu_interrupt, IRQF_NO_AUTOEN,
+ "VPU", rproc);
if (ret < 0) {
dev_err(dev, "Failed to request IRQ\n");
return ret;
}
- disable_irq(vpu->irq);
-
ret = devm_rproc_add(dev, rproc);
if (ret) {
dev_err(dev, "Failed to register remote processor\n");
diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c
index 6dc955ecab80..00f041ebcde6 100644
--- a/drivers/remoteproc/mtk_scp_ipi.c
+++ b/drivers/remoteproc/mtk_scp_ipi.c
@@ -23,7 +23,7 @@
*
* Register an ipi function to receive ipi interrupt from SCP.
*
- * Returns 0 if ipi registers successfully, -error on error.
+ * Return: 0 if ipi registers successfully, -error on error.
*/
int scp_ipi_register(struct mtk_scp *scp,
u32 id,
@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(scp_ipi_unlock);
* When the processing completes, IPI handler registered
* by scp_ipi_register will be called in interrupt context.
*
- * Returns 0 if sending data successfully, -error on error.
+ * Return: 0 if sending data successfully, -error on error.
**/
int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
unsigned int wait)
diff --git a/drivers/remoteproc/qcom_pil_info.c b/drivers/remoteproc/qcom_pil_info.c
index 7c007dd7b200..aca21560e20b 100644
--- a/drivers/remoteproc/qcom_pil_info.c
+++ b/drivers/remoteproc/qcom_pil_info.c
@@ -104,7 +104,7 @@ int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size)
return -ENOMEM;
found_unused:
- memcpy_toio(entry, image, PIL_RELOC_NAME_LEN);
+ memcpy_toio(entry, image, strnlen(image, PIL_RELOC_NAME_LEN));
found_existing:
/* Use two writel() as base is only aligned to 4 bytes on odd entries */
writel(base, entry + PIL_RELOC_NAME_LEN);
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
index 03857dc9cdc1..184bb7cdf95a 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -524,6 +524,23 @@ static const struct adsp_data sdm845_adsp_resource_init = {
.ssctl_id = 0x14,
};
+static const struct adsp_data sm6350_adsp_resource = {
+ .crash_reason_smem = 423,
+ .firmware_name = "adsp.mdt",
+ .pas_id = 1,
+ .has_aggre2_clk = false,
+ .auto_boot = true,
+ .proxy_pd_names = (char*[]){
+ "lcx",
+ "lmx",
+ NULL
+ },
+ .load_state = "adsp",
+ .ssr_name = "lpass",
+ .sysmon_name = "adsp",
+ .ssctl_id = 0x14,
+};
+
static const struct adsp_data sm8150_adsp_resource = {
.crash_reason_smem = 423,
.firmware_name = "adsp.mdt",
@@ -612,6 +629,23 @@ static const struct adsp_data sdm845_cdsp_resource_init = {
.ssctl_id = 0x17,
};
+static const struct adsp_data sm6350_cdsp_resource = {
+ .crash_reason_smem = 601,
+ .firmware_name = "cdsp.mdt",
+ .pas_id = 18,
+ .has_aggre2_clk = false,
+ .auto_boot = true,
+ .proxy_pd_names = (char*[]){
+ "cx",
+ "mx",
+ NULL
+ },
+ .load_state = "cdsp",
+ .ssr_name = "cdsp",
+ .sysmon_name = "cdsp",
+ .ssctl_id = 0x17,
+};
+
static const struct adsp_data sm8150_cdsp_resource = {
.crash_reason_smem = 601,
.firmware_name = "cdsp.mdt",
@@ -652,6 +686,7 @@ static const struct adsp_data sm8350_cdsp_resource = {
.auto_boot = true,
.proxy_pd_names = (char*[]){
"cx",
+ "mxc",
NULL
},
.load_state = "cdsp",
@@ -804,6 +839,9 @@ static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,sdm845-adsp-pas", .data = &sdm845_adsp_resource_init},
{ .compatible = "qcom,sdm845-cdsp-pas", .data = &sdm845_cdsp_resource_init},
{ .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource},
+ { .compatible = "qcom,sm6350-adsp-pas", .data = &sm6350_adsp_resource},
+ { .compatible = "qcom,sm6350-cdsp-pas", .data = &sm6350_cdsp_resource},
+ { .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init},
{ .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource},
{ .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource},
{ .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init},
diff --git a/drivers/remoteproc/rcar_rproc.c b/drivers/remoteproc/rcar_rproc.c
new file mode 100644
index 000000000000..aa86154109c7
--- /dev/null
+++ b/drivers/remoteproc/rcar_rproc.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) IoT.bzh 2021
+ */
+
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/pm_runtime.h>
+#include <linux/remoteproc.h>
+#include <linux/reset.h>
+#include <linux/soc/renesas/rcar-rst.h>
+
+#include "remoteproc_internal.h"
+
+struct rcar_rproc {
+ struct reset_control *rst;
+};
+
+static int rcar_rproc_mem_alloc(struct rproc *rproc,
+ struct rproc_mem_entry *mem)
+{
+ struct device *dev = &rproc->dev;
+ void *va;
+
+ dev_dbg(dev, "map memory: %pa+%zx\n", &mem->dma, mem->len);
+ va = ioremap_wc(mem->dma, mem->len);
+ if (!va) {
+ dev_err(dev, "Unable to map memory region: %pa+%zx\n",
+ &mem->dma, mem->len);
+ return -ENOMEM;
+ }
+
+ /* Update memory entry va */
+ mem->va = va;
+
+ return 0;
+}
+
+static int rcar_rproc_mem_release(struct rproc *rproc,
+ struct rproc_mem_entry *mem)
+{
+ dev_dbg(&rproc->dev, "unmap memory: %pa\n", &mem->dma);
+ iounmap(mem->va);
+
+ return 0;
+}
+
+static int rcar_rproc_prepare(struct rproc *rproc)
+{
+ struct device *dev = rproc->dev.parent;
+ struct device_node *np = dev->of_node;
+ struct of_phandle_iterator it;
+ struct rproc_mem_entry *mem;
+ struct reserved_mem *rmem;
+ u32 da;
+
+ /* Register associated reserved memory regions */
+ of_phandle_iterator_init(&it, np, "memory-region", NULL, 0);
+ while (of_phandle_iterator_next(&it) == 0) {
+
+ rmem = of_reserved_mem_lookup(it.node);
+ if (!rmem) {
+ dev_err(&rproc->dev,
+ "unable to acquire memory-region\n");
+ return -EINVAL;
+ }
+
+ if (rmem->base > U32_MAX)
+ return -EINVAL;
+
+ /* No need to translate pa to da, R-Car use same map */
+ da = rmem->base;
+ mem = rproc_mem_entry_init(dev, NULL,
+ rmem->base,
+ rmem->size, da,
+ rcar_rproc_mem_alloc,
+ rcar_rproc_mem_release,
+ it.node->name);
+
+ if (!mem)
+ return -ENOMEM;
+
+ rproc_add_carveout(rproc, mem);
+ }
+
+ return 0;
+}
+
+static int rcar_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
+{
+ int ret;
+
+ ret = rproc_elf_load_rsc_table(rproc, fw);
+ if (ret)
+ dev_info(&rproc->dev, "No resource table in elf\n");
+
+ return 0;
+}
+
+static int rcar_rproc_start(struct rproc *rproc)
+{
+ struct rcar_rproc *priv = rproc->priv;
+ int err;
+
+ if (!rproc->bootaddr)
+ return -EINVAL;
+
+ err = rcar_rst_set_rproc_boot_addr(rproc->bootaddr);
+ if (err) {
+ dev_err(&rproc->dev, "failed to set rproc boot addr\n");
+ return err;
+ }
+
+ err = reset_control_deassert(priv->rst);
+ if (err)
+ dev_err(&rproc->dev, "failed to deassert reset\n");
+
+ return err;
+}
+
+static int rcar_rproc_stop(struct rproc *rproc)
+{
+ struct rcar_rproc *priv = rproc->priv;
+ int err;
+
+ err = reset_control_assert(priv->rst);
+ if (err)
+ dev_err(&rproc->dev, "failed to assert reset\n");
+
+ return err;
+}
+
+static struct rproc_ops rcar_rproc_ops = {
+ .prepare = rcar_rproc_prepare,
+ .start = rcar_rproc_start,
+ .stop = rcar_rproc_stop,
+ .load = rproc_elf_load_segments,
+ .parse_fw = rcar_rproc_parse_fw,
+ .find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table,
+ .sanity_check = rproc_elf_sanity_check,
+ .get_boot_addr = rproc_elf_get_boot_addr,
+
+};
+
+static int rcar_rproc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct rcar_rproc *priv;
+ struct rproc *rproc;
+ int ret;
+
+ rproc = devm_rproc_alloc(dev, np->name, &rcar_rproc_ops,
+ NULL, sizeof(*priv));
+ if (!rproc)
+ return -ENOMEM;
+
+ priv = rproc->priv;
+
+ priv->rst = devm_reset_control_get_exclusive(dev, NULL);
+ if (IS_ERR(priv->rst)) {
+ ret = PTR_ERR(priv->rst);
+ dev_err_probe(dev, ret, "fail to acquire rproc reset\n");
+ return ret;
+ }
+
+ pm_runtime_enable(dev);
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ dev_err(dev, "failed to power up\n");
+ return ret;
+ }
+
+ dev_set_drvdata(dev, rproc);
+
+ /* Manually start the rproc */
+ rproc->auto_boot = false;
+
+ ret = devm_rproc_add(dev, rproc);
+ if (ret) {
+ dev_err(dev, "rproc_add failed\n");
+ goto pm_disable;
+ }
+
+ return 0;
+
+pm_disable:
+ pm_runtime_disable(dev);
+
+ return ret;
+}
+
+static int rcar_rproc_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
+static const struct of_device_id rcar_rproc_of_match[] = {
+ { .compatible = "renesas,rcar-cr7" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, rcar_rproc_of_match);
+
+static struct platform_driver rcar_rproc_driver = {
+ .probe = rcar_rproc_probe,
+ .remove = rcar_rproc_remove,
+ .driver = {
+ .name = "rcar-rproc",
+ .of_match_table = rcar_rproc_of_match,
+ },
+};
+
+module_platform_driver(rcar_rproc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Renesas R-Car Gen3 remote processor control driver");
+MODULE_AUTHOR("Julien Massot <julien.massot@iot.bzh>");
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 775df165eb45..69f51acf235e 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -577,8 +577,8 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr,
dma_get_mask(rproc->dev.parent));
if (ret) {
dev_warn(dev,
- "Failed to set DMA mask %llx. Trying to continue... %x\n",
- dma_get_mask(rproc->dev.parent), ret);
+ "Failed to set DMA mask %llx. Trying to continue... (%pe)\n",
+ dma_get_mask(rproc->dev.parent), ERR_PTR(ret));
}
/* parse the vrings */
diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c
index c892f433a323..4b093420d98a 100644
--- a/drivers/remoteproc/remoteproc_coredump.c
+++ b/drivers/remoteproc/remoteproc_coredump.c
@@ -166,7 +166,7 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest,
memset(dest, 0xff, size);
} else {
if (is_iomem)
- memcpy_fromio(dest, ptr, size);
+ memcpy_fromio(dest, (void const __iomem *)ptr, size);
else
memcpy(dest, ptr, size);
}
diff --git a/drivers/remoteproc/st_slim_rproc.c b/drivers/remoteproc/st_slim_rproc.c
index 22096adc1ad3..4ed9467897e5 100644
--- a/drivers/remoteproc/st_slim_rproc.c
+++ b/drivers/remoteproc/st_slim_rproc.c
@@ -216,7 +216,7 @@ static const struct rproc_ops slim_rproc_ops = {
* obtains and enables any clocks required by the SLIM core and also
* ioremaps the various IO.
*
- * Returns st_slim_rproc pointer or PTR_ERR() on error.
+ * Return: st_slim_rproc pointer or PTR_ERR() on error.
*/
struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev,
diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c
index b643efcf995a..7d782ed9e589 100644
--- a/drivers/remoteproc/stm32_rproc.c
+++ b/drivers/remoteproc/stm32_rproc.c
@@ -494,7 +494,7 @@ static int stm32_rproc_stop(struct rproc *rproc)
int err, idx;
/* request shutdown of the remote processor */
- if (rproc->state != RPROC_OFFLINE) {
+ if (rproc->state != RPROC_OFFLINE && rproc->state != RPROC_CRASHED) {
idx = stm32_rproc_mbox_idx(rproc, STM32_MBX_SHUTDOWN);
if (idx >= 0 && ddata->mb[idx].chan) {
err = mbox_send_message(ddata->mb[idx].chan, "detach");
diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index c352fa277c8d..939c5d90b562 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -767,6 +767,7 @@ static const struct k3_dsp_dev_data c71_data = {
static const struct of_device_id k3_dsp_of_match[] = {
{ .compatible = "ti,j721e-c66-dsp", .data = &c66_data, },
{ .compatible = "ti,j721e-c71-dsp", .data = &c71_data, },
+ { .compatible = "ti,j721s2-c71-dsp", .data = &c71_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, k3_dsp_of_match);
diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
index 6499302d00c3..969531c05b13 100644
--- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
@@ -1535,7 +1535,7 @@ static const struct k3_r5_soc_data am65_j721e_soc_data = {
.single_cpu_mode = false,
};
-static const struct k3_r5_soc_data j7200_soc_data = {
+static const struct k3_r5_soc_data j7200_j721s2_soc_data = {
.tcm_is_double = true,
.tcm_ecc_autoinit = true,
.single_cpu_mode = false,
@@ -1550,8 +1550,9 @@ static const struct k3_r5_soc_data am64_soc_data = {
static const struct of_device_id k3_r5_of_match[] = {
{ .compatible = "ti,am654-r5fss", .data = &am65_j721e_soc_data, },
{ .compatible = "ti,j721e-r5fss", .data = &am65_j721e_soc_data, },
- { .compatible = "ti,j7200-r5fss", .data = &j7200_soc_data, },
+ { .compatible = "ti,j7200-r5fss", .data = &j7200_j721s2_soc_data, },
{ .compatible = "ti,am64-r5fss", .data = &am64_soc_data, },
+ { .compatible = "ti,j721s2-r5fss", .data = &j7200_j721s2_soc_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, k3_r5_of_match);
diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
index 3f377a795b33..1030cfa80e04 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -427,7 +427,7 @@ static void qcom_glink_handle_intent_req_ack(struct qcom_glink *glink,
* Allocates a local channel id and sends a RPM_CMD_OPEN message to the remote.
* Will return with refcount held, regardless of outcome.
*
- * Returns 0 on success, negative errno otherwise.
+ * Return: 0 on success, negative errno otherwise.
*/
static int qcom_glink_send_open_req(struct qcom_glink *glink,
struct glink_channel *channel)
diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 8da1b5cb31b3..540e027f08c4 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -1467,7 +1467,7 @@ ATTRIBUTE_GROUPS(qcom_smd_edge);
* @parent: parent device for the edge
* @node: device_node describing the edge
*
- * Returns an edge reference, or negative ERR_PTR() on failure.
+ * Return: an edge reference, or negative ERR_PTR() on failure.
*/
struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
struct device_node *node)
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index b5907b80727c..d6214cb66026 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -9,6 +9,9 @@
* Based on rpmsg performance statistics driver by Michal Simek, which in turn
* was based on TI & Google OMX rpmsg driver.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/fs.h>
@@ -550,7 +553,7 @@ static int rpmsg_chrdev_init(void)
ret = alloc_chrdev_region(&rpmsg_major, 0, RPMSG_DEV_MAX, "rpmsg");
if (ret < 0) {
- pr_err("rpmsg: failed to allocate char dev region\n");
+ pr_err("failed to allocate char dev region\n");
return ret;
}
@@ -563,7 +566,7 @@ static int rpmsg_chrdev_init(void)
ret = register_rpmsg_driver(&rpmsg_chrdev_driver);
if (ret < 0) {
- pr_err("rpmsgchr: failed to register rpmsg driver\n");
+ pr_err("failed to register rpmsg driver\n");
class_destroy(rpmsg_class);
unregister_chrdev_region(rpmsg_major, RPMSG_DEV_MAX);
}
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index d3eb60059ef1..d9e612f4f0f2 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -26,7 +26,7 @@
* @rpdev: rpmsg device
* @chinfo: channel_info to bind
*
- * Returns a pointer to the new rpmsg device on success, or NULL on error.
+ * Return: a pointer to the new rpmsg device on success, or NULL on error.
*/
struct rpmsg_device *rpmsg_create_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
@@ -48,7 +48,7 @@ EXPORT_SYMBOL(rpmsg_create_channel);
* @rpdev: rpmsg device
* @chinfo: channel_info to bind
*
- * Returns 0 on success or an appropriate error value.
+ * Return: 0 on success or an appropriate error value.
*/
int rpmsg_release_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(rpmsg_release_channel);
* dynamically assign them an available rpmsg address (drivers should have
* a very good reason why not to always use RPMSG_ADDR_ANY here).
*
- * Returns a pointer to the endpoint on success, or NULL on error.
+ * Return: a pointer to the endpoint on success, or NULL on error.
*/
struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev,
rpmsg_rx_cb_t cb, void *priv,
@@ -146,7 +146,7 @@ EXPORT_SYMBOL(rpmsg_destroy_ept);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
{
@@ -175,7 +175,7 @@ EXPORT_SYMBOL(rpmsg_send);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
{
@@ -206,7 +206,7 @@ EXPORT_SYMBOL(rpmsg_sendto);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
void *data, int len)
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(rpmsg_send_offchannel);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
{
@@ -263,7 +263,7 @@ EXPORT_SYMBOL(rpmsg_trysend);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
{
@@ -282,7 +282,7 @@ EXPORT_SYMBOL(rpmsg_trysendto);
* @filp: file for poll_wait()
* @wait: poll_table for poll_wait()
*
- * Returns mask representing the current state of the endpoint's send buffers
+ * Return: mask representing the current state of the endpoint's send buffers
*/
__poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
poll_table *wait)
@@ -313,7 +313,7 @@ EXPORT_SYMBOL(rpmsg_poll);
*
* Can only be called from process context (for now).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
void *data, int len)
@@ -540,13 +540,25 @@ static int rpmsg_dev_probe(struct device *dev)
err = rpdrv->probe(rpdev);
if (err) {
dev_err(dev, "%s: failed: %d\n", __func__, err);
- if (ept)
- rpmsg_destroy_ept(ept);
- goto out;
+ goto destroy_ept;
}
- if (ept && rpdev->ops->announce_create)
+ if (ept && rpdev->ops->announce_create) {
err = rpdev->ops->announce_create(rpdev);
+ if (err) {
+ dev_err(dev, "failed to announce creation\n");
+ goto remove_rpdev;
+ }
+ }
+
+ return 0;
+
+remove_rpdev:
+ if (rpdrv->remove)
+ rpdrv->remove(rpdev);
+destroy_ept:
+ if (ept)
+ rpmsg_destroy_ept(ept);
out:
return err;
}
@@ -623,7 +635,7 @@ EXPORT_SYMBOL(rpmsg_unregister_device);
* @rpdrv: pointer to a struct rpmsg_driver
* @owner: owning module/driver
*
- * Returns 0 on success, and an appropriate error value on failure.
+ * Return: 0 on success, and an appropriate error value on failure.
*/
int __register_rpmsg_driver(struct rpmsg_driver *rpdrv, struct module *owner)
{
@@ -637,7 +649,7 @@ EXPORT_SYMBOL(__register_rpmsg_driver);
* unregister_rpmsg_driver() - unregister an rpmsg driver from the rpmsg bus
* @rpdrv: pointer to a struct rpmsg_driver
*
- * Returns 0 on success, and an appropriate error value on failure.
+ * Return: 0 on success, and an appropriate error value on failure.
*/
void unregister_rpmsg_driver(struct rpmsg_driver *rpdrv)
{
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 9c112aa65040..ac764e04c898 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -547,7 +547,7 @@ static void rpmsg_downref_sleepers(struct virtproc_info *vrp)
* should use the appropriate rpmsg_{try}send{to, _offchannel} API
* (see include/linux/rpmsg.h).
*
- * Returns 0 on success and an appropriate error value on failure.
+ * Return: 0 on success and an appropriate error value on failure.
*/
static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev,
u32 src, u32 dst,
@@ -1024,7 +1024,7 @@ static void rpmsg_remove(struct virtio_device *vdev)
size_t total_buf_space = vrp->num_bufs * vrp->buf_size;
int ret;
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
ret = device_for_each_child(&vdev->dev, NULL, rpmsg_remove_device);
if (ret)
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 058e56a10ab8..d85a3c31347c 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1216,6 +1216,17 @@ config RTC_DRV_V3020
This driver can also be built as a module. If so, the module
will be called rtc-v3020.
+config RTC_DRV_GAMECUBE
+ tristate "Nintendo GameCube, Wii and Wii U RTC"
+ depends on GAMECUBE || WII || COMPILE_TEST
+ select REGMAP
+ help
+ If you say yes here you will get support for the RTC subsystem
+ of the Nintendo GameCube, Wii and Wii U.
+
+ This driver can also be built as a module. If so, the module
+ will be called "rtc-gamecube".
+
config RTC_DRV_WM831X
tristate "Wolfson Microelectronics WM831x RTC"
depends on MFD_WM831X
@@ -1444,6 +1455,19 @@ config RTC_DRV_SH
To compile this driver as a module, choose M here: the
module will be called rtc-sh.
+config RTC_DRV_SUNPLUS
+ tristate "Sunplus SP7021 RTC"
+ depends on SOC_SP7021
+ help
+ Say 'yes' to get support for the real-time clock present in
+ Sunplus SP7021 - a SoC for industrial applications. It provides
+ RTC status check, timer/alarm functionalities, user data
+ reservation with the battery over 2.5V, RTC power status check
+ and battery charge.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-sunplus.
+
config RTC_DRV_VR41XX
tristate "NEC VR41XX"
depends on CPU_VR41XX || COMPILE_TEST
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 678a8ef4abae..e92f3e943245 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_RTC_DRV_MT7622) += rtc-mt7622.o
obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o
obj-$(CONFIG_RTC_DRV_MXC) += rtc-mxc.o
obj-$(CONFIG_RTC_DRV_MXC_V2) += rtc-mxc_v2.o
+obj-$(CONFIG_RTC_DRV_GAMECUBE) += rtc-gamecube.o
obj-$(CONFIG_RTC_DRV_NTXEC) += rtc-ntxec.o
obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
obj-$(CONFIG_RTC_DRV_OPAL) += rtc-opal.o
@@ -165,6 +166,7 @@ obj-$(CONFIG_RTC_DRV_STM32) += rtc-stm32.o
obj-$(CONFIG_RTC_DRV_STMP) += rtc-stmp3xxx.o
obj-$(CONFIG_RTC_DRV_SUN4V) += rtc-sun4v.o
obj-$(CONFIG_RTC_DRV_SUN6I) += rtc-sun6i.o
+obj-$(CONFIG_RTC_DRV_SUNPLUS) += rtc-sunplus.o
obj-$(CONFIG_RTC_DRV_SUNXI) += rtc-sunxi.o
obj-$(CONFIG_RTC_DRV_TEGRA) += rtc-tegra.o
obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index e104972a28fd..69325aeede1a 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -391,14 +391,14 @@ static long rtc_dev_ioctl(struct file *file,
}
switch(param.param) {
- long offset;
case RTC_PARAM_FEATURES:
if (param.index != 0)
err = -EINVAL;
param.uvalue = rtc->features[0];
break;
- case RTC_PARAM_CORRECTION:
+ case RTC_PARAM_CORRECTION: {
+ long offset;
mutex_unlock(&rtc->ops_lock);
if (param.index != 0)
return -EINVAL;
@@ -407,7 +407,7 @@ static long rtc_dev_ioctl(struct file *file,
if (err == 0)
param.svalue = offset;
break;
-
+ }
default:
if (rtc->ops->param_get)
err = rtc->ops->param_get(rtc->dev.parent, &param);
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 4eb53412b808..7c006c2b125f 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
static int cmos_read_time(struct device *dev, struct rtc_time *t)
{
+ int ret;
+
/*
* If pm_trace abused the RTC for storage, set the timespec to 0,
* which tells the caller that this RTC value is unusable.
@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
if (!pm_trace_rtc_valid())
return -EIO;
- mc146818_get_time(t);
+ ret = mc146818_get_time(t);
+ if (ret < 0) {
+ dev_err_ratelimited(dev, "unable to read current time\n");
+ return ret;
+ }
+
return 0;
}
@@ -242,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
return mc146818_set_time(t);
}
+struct cmos_read_alarm_callback_param {
+ struct cmos_rtc *cmos;
+ struct rtc_time *time;
+ unsigned char rtc_control;
+};
+
+static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
+ void *param_in)
+{
+ struct cmos_read_alarm_callback_param *p =
+ (struct cmos_read_alarm_callback_param *)param_in;
+ struct rtc_time *time = p->time;
+
+ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+
+ if (p->cmos->day_alrm) {
+ /* ignore upper bits on readback per ACPI spec */
+ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
+ if (!time->tm_mday)
+ time->tm_mday = -1;
+
+ if (p->cmos->mon_alrm) {
+ time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
+ if (!time->tm_mon)
+ time->tm_mon = -1;
+ }
+ }
+
+ p->rtc_control = CMOS_READ(RTC_CONTROL);
+}
+
static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned char rtc_control;
+ struct cmos_read_alarm_callback_param p = {
+ .cmos = cmos,
+ .time = &t->time,
+ };
/* This not only a rtc_op, but also called directly */
if (!is_valid_irq(cmos->irq))
@@ -256,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
* the future.
*/
- spin_lock_irq(&rtc_lock);
- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
-
- if (cmos->day_alrm) {
- /* ignore upper bits on readback per ACPI spec */
- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
- if (!t->time.tm_mday)
- t->time.tm_mday = -1;
-
- if (cmos->mon_alrm) {
- t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
- if (!t->time.tm_mon)
- t->time.tm_mon = -1;
- }
- }
-
- rtc_control = CMOS_READ(RTC_CONTROL);
- spin_unlock_irq(&rtc_lock);
+ /* Some Intel chipsets disconnect the alarm registers when the clock
+ * update is in progress - during this time reads return bogus values
+ * and writes may fail silently. See for example "7th Generation Intel®
+ * Processor Family I/O for U/Y Platforms [...] Datasheet", section
+ * 27.7.1
+ *
+ * Use the mc146818_avoid_UIP() function to avoid this.
+ */
+ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
+ return -EIO;
- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
if (((unsigned)t->time.tm_sec) < 0x60)
t->time.tm_sec = bcd2bin(t->time.tm_sec);
else
@@ -306,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
}
}
- t->enabled = !!(rtc_control & RTC_AIE);
+ t->enabled = !!(p.rtc_control & RTC_AIE);
t->pending = 0;
return 0;
@@ -437,10 +470,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
+struct cmos_set_alarm_callback_param {
+ struct cmos_rtc *cmos;
+ unsigned char mon, mday, hrs, min, sec;
+ struct rtc_wkalrm *t;
+};
+
+/* Note: this function may be executed by mc146818_avoid_UIP() more then
+ * once
+ */
+static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
+ void *param_in)
+{
+ struct cmos_set_alarm_callback_param *p =
+ (struct cmos_set_alarm_callback_param *)param_in;
+
+ /* next rtc irq must not be from previous alarm setting */
+ cmos_irq_disable(p->cmos, RTC_AIE);
+
+ /* update alarm */
+ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
+ CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
+ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
+
+ /* the system may support an "enhanced" alarm */
+ if (p->cmos->day_alrm) {
+ CMOS_WRITE(p->mday, p->cmos->day_alrm);
+ if (p->cmos->mon_alrm)
+ CMOS_WRITE(p->mon, p->cmos->mon_alrm);
+ }
+
+ if (use_hpet_alarm()) {
+ /*
+ * FIXME the HPET alarm glue currently ignores day_alrm
+ * and mon_alrm ...
+ */
+ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
+ p->t->time.tm_sec);
+ }
+
+ if (p->t->enabled)
+ cmos_irq_enable(p->cmos, RTC_AIE);
+}
+
static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned char mon, mday, hrs, min, sec, rtc_control;
+ struct cmos_set_alarm_callback_param p = {
+ .cmos = cmos,
+ .t = t
+ };
+ unsigned char rtc_control;
int ret;
/* This not only a rtc_op, but also called directly */
@@ -451,52 +531,33 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
if (ret < 0)
return ret;
- mon = t->time.tm_mon + 1;
- mday = t->time.tm_mday;
- hrs = t->time.tm_hour;
- min = t->time.tm_min;
- sec = t->time.tm_sec;
+ p.mon = t->time.tm_mon + 1;
+ p.mday = t->time.tm_mday;
+ p.hrs = t->time.tm_hour;
+ p.min = t->time.tm_min;
+ p.sec = t->time.tm_sec;
+ spin_lock_irq(&rtc_lock);
rtc_control = CMOS_READ(RTC_CONTROL);
+ spin_unlock_irq(&rtc_lock);
+
if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
/* Writing 0xff means "don't care" or "match all". */
- mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
- min = (min < 60) ? bin2bcd(min) : 0xff;
- sec = (sec < 60) ? bin2bcd(sec) : 0xff;
- }
-
- spin_lock_irq(&rtc_lock);
-
- /* next rtc irq must not be from previous alarm setting */
- cmos_irq_disable(cmos, RTC_AIE);
-
- /* update alarm */
- CMOS_WRITE(hrs, RTC_HOURS_ALARM);
- CMOS_WRITE(min, RTC_MINUTES_ALARM);
- CMOS_WRITE(sec, RTC_SECONDS_ALARM);
-
- /* the system may support an "enhanced" alarm */
- if (cmos->day_alrm) {
- CMOS_WRITE(mday, cmos->day_alrm);
- if (cmos->mon_alrm)
- CMOS_WRITE(mon, cmos->mon_alrm);
+ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
+ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
+ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
+ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
+ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
}
- if (use_hpet_alarm()) {
- /*
- * FIXME the HPET alarm glue currently ignores day_alrm
- * and mon_alrm ...
- */
- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
- t->time.tm_sec);
- }
-
- if (t->enabled)
- cmos_irq_enable(cmos, RTC_AIE);
-
- spin_unlock_irq(&rtc_lock);
+ /*
+ * Some Intel chipsets disconnect the alarm registers when the clock
+ * update is in progress - during this time writes fail silently.
+ *
+ * Use mc146818_avoid_UIP() to avoid this.
+ */
+ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
+ return -EIO;
cmos->alarm_expires = rtc_tm_to_time64(&t->time);
@@ -790,16 +851,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
- spin_lock_irq(&rtc_lock);
-
- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
- if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
- spin_unlock_irq(&rtc_lock);
- dev_warn(dev, "not accessible\n");
+ if (!mc146818_does_rtc_work()) {
+ dev_warn(dev, "broken or not accessible\n");
retval = -ENXIO;
goto cleanup1;
}
+ spin_lock_irq(&rtc_lock);
+
if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
/* force periodic irq to CMOS reset default of 1024Hz;
*
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index d4b72a9fa2ba..ee2efb496174 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -475,12 +475,14 @@ static int da9063_rtc_probe(struct platform_device *pdev)
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
rtc->rtc_sync = false;
- /*
- * TODO: some models have alarms on a minute boundary but still support
- * real hardware interrupts. Add this once the core supports it.
- */
- if (config->rtc_data_start != RTC_SEC)
- rtc->rtc_dev->uie_unsupported = 1;
+ if (config->rtc_data_start != RTC_SEC) {
+ set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtc_dev->features);
+ /*
+ * TODO: some models have alarms on a minute boundary but still
+ * support real hardware interrupts.
+ */
+ clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtc_dev->features);
+ }
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
if (irq_alarm < 0)
@@ -494,6 +496,8 @@ static int da9063_rtc_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
irq_alarm, ret);
+ device_init_wakeup(&pdev->dev, true);
+
return devm_rtc_register_device(rtc->rtc_dev);
}
diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
index ad3add5db4c8..53bb08fe1cd4 100644
--- a/drivers/rtc/rtc-ftrtc010.c
+++ b/drivers/rtc/rtc-ftrtc010.c
@@ -141,11 +141,9 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
}
}
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res)
- return -ENODEV;
-
- rtc->rtc_irq = res->start;
+ rtc->rtc_irq = platform_get_irq(pdev, 0);
+ if (rtc->rtc_irq < 0)
+ return rtc->rtc_irq;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
diff --git a/drivers/rtc/rtc-gamecube.c b/drivers/rtc/rtc-gamecube.c
new file mode 100644
index 000000000000..f717b36f4738
--- /dev/null
+++ b/drivers/rtc/rtc-gamecube.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Nintendo GameCube, Wii and Wii U RTC driver
+ *
+ * This driver is for the MX23L4005, more specifically its real-time clock and
+ * SRAM storage. The value returned by the RTC counter must be added with the
+ * offset stored in a bias register in SRAM (on the GameCube and Wii) or in
+ * /config/rtc.xml (on the Wii U). The latter being very impractical to access
+ * from Linux, this driver assumes the bootloader has read it and stored it in
+ * SRAM like for the other two consoles.
+ *
+ * This device sits on a bus named EXI (which is similar to SPI), channel 0,
+ * device 1. This driver assumes no other user of the EXI bus, which is
+ * currently the case but would have to be reworked to add support for other
+ * GameCube hardware exposed on this bus.
+ *
+ * References:
+ * - https://wiiubrew.org/wiki/Hardware/RTC
+ * - https://wiibrew.org/wiki/MX23L4005
+ *
+ * Copyright (C) 2018 rw-r-r-0644
+ * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+ *
+ * Based on rtc-gcn.c
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2005,2008,2009 Albert Herranz
+ * Based on gamecube_time.c from Torben Nielsen.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+/* EXI registers */
+#define EXICSR 0
+#define EXICR 12
+#define EXIDATA 16
+
+/* EXI register values */
+#define EXICSR_DEV 0x380
+ #define EXICSR_DEV1 0x100
+#define EXICSR_CLK 0x070
+ #define EXICSR_CLK_1MHZ 0x000
+ #define EXICSR_CLK_2MHZ 0x010
+ #define EXICSR_CLK_4MHZ 0x020
+ #define EXICSR_CLK_8MHZ 0x030
+ #define EXICSR_CLK_16MHZ 0x040
+ #define EXICSR_CLK_32MHZ 0x050
+#define EXICSR_INT 0x008
+ #define EXICSR_INTSET 0x008
+
+#define EXICR_TSTART 0x001
+#define EXICR_TRSMODE 0x002
+ #define EXICR_TRSMODE_IMM 0x000
+#define EXICR_TRSTYPE 0x00C
+ #define EXICR_TRSTYPE_R 0x000
+ #define EXICR_TRSTYPE_W 0x004
+#define EXICR_TLEN 0x030
+ #define EXICR_TLEN32 0x030
+
+/* EXI registers values to access the RTC */
+#define RTC_EXICSR (EXICSR_DEV1 | EXICSR_CLK_8MHZ | EXICSR_INTSET)
+#define RTC_EXICR_W (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_W | EXICR_TLEN32)
+#define RTC_EXICR_R (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_R | EXICR_TLEN32)
+#define RTC_EXIDATA_W 0x80000000
+
+/* RTC registers */
+#define RTC_COUNTER 0x200000
+#define RTC_SRAM 0x200001
+#define RTC_SRAM_BIAS 0x200004
+#define RTC_SNAPSHOT 0x204000
+#define RTC_ONTMR 0x210000
+#define RTC_OFFTMR 0x210001
+#define RTC_TEST0 0x210004
+#define RTC_TEST1 0x210005
+#define RTC_TEST2 0x210006
+#define RTC_TEST3 0x210007
+#define RTC_CONTROL0 0x21000c
+#define RTC_CONTROL1 0x21000d
+
+/* RTC flags */
+#define RTC_CONTROL0_UNSTABLE_POWER 0x00000800
+#define RTC_CONTROL0_LOW_BATTERY 0x00000200
+
+struct priv {
+ struct regmap *regmap;
+ void __iomem *iob;
+ u32 rtc_bias;
+};
+
+static int exi_read(void *context, u32 reg, u32 *data)
+{
+ struct priv *d = (struct priv *)context;
+ void __iomem *iob = d->iob;
+
+ /* The spin loops here loop about 15~16 times each, so there is no need
+ * to use a more expensive sleep method.
+ */
+
+ /* Write register offset */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(reg << 8, iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Read data */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(RTC_EXICR_R, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+ *data = ioread32be(iob + EXIDATA);
+
+ /* Clear channel parameters */
+ iowrite32be(0, iob + EXICSR);
+
+ return 0;
+}
+
+static int exi_write(void *context, u32 reg, u32 data)
+{
+ struct priv *d = (struct priv *)context;
+ void __iomem *iob = d->iob;
+
+ /* The spin loops here loop about 15~16 times each, so there is no need
+ * to use a more expensive sleep method.
+ */
+
+ /* Write register offset */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(RTC_EXIDATA_W | (reg << 8), iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Write data */
+ iowrite32be(RTC_EXICSR, iob + EXICSR);
+ iowrite32be(data, iob + EXIDATA);
+ iowrite32be(RTC_EXICR_W, iob + EXICR);
+ while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+ cpu_relax();
+
+ /* Clear channel parameters */
+ iowrite32be(0, iob + EXICSR);
+
+ return 0;
+}
+
+static const struct regmap_bus exi_bus = {
+ /* TODO: is that true? Not that it matters here, but still. */
+ .fast_io = true,
+ .reg_read = exi_read,
+ .reg_write = exi_write,
+};
+
+static int gamecube_rtc_read_time(struct device *dev, struct rtc_time *t)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ int ret;
+ u32 counter;
+ time64_t timestamp;
+
+ ret = regmap_read(d->regmap, RTC_COUNTER, &counter);
+ if (ret)
+ return ret;
+
+ /* Add the counter and the bias to obtain the timestamp */
+ timestamp = (time64_t)d->rtc_bias + counter;
+ rtc_time64_to_tm(timestamp, t);
+
+ return 0;
+}
+
+static int gamecube_rtc_set_time(struct device *dev, struct rtc_time *t)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ time64_t timestamp;
+
+ /* Subtract the timestamp and the bias to obtain the counter value */
+ timestamp = rtc_tm_to_time64(t);
+ return regmap_write(d->regmap, RTC_COUNTER, timestamp - d->rtc_bias);
+}
+
+static int gamecube_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct priv *d = dev_get_drvdata(dev);
+ int value;
+ int control0;
+ int ret;
+
+ switch (cmd) {
+ case RTC_VL_READ:
+ ret = regmap_read(d->regmap, RTC_CONTROL0, &control0);
+ if (ret)
+ return ret;
+
+ value = 0;
+ if (control0 & RTC_CONTROL0_UNSTABLE_POWER)
+ value |= RTC_VL_DATA_INVALID;
+ if (control0 & RTC_CONTROL0_LOW_BATTERY)
+ value |= RTC_VL_BACKUP_LOW;
+ return put_user(value, (unsigned int __user *)arg);
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static const struct rtc_class_ops gamecube_rtc_ops = {
+ .read_time = gamecube_rtc_read_time,
+ .set_time = gamecube_rtc_set_time,
+ .ioctl = gamecube_rtc_ioctl,
+};
+
+static int gamecube_rtc_read_offset_from_sram(struct priv *d)
+{
+ struct device_node *np;
+ int ret;
+ struct resource res;
+ void __iomem *hw_srnprot;
+ u32 old;
+
+ np = of_find_compatible_node(NULL, NULL, "nintendo,latte-srnprot");
+ if (!np)
+ np = of_find_compatible_node(NULL, NULL,
+ "nintendo,hollywood-srnprot");
+ if (!np) {
+ pr_info("HW_SRNPROT not found, assuming a GameCube\n");
+ return regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+ }
+
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ pr_err("no io memory range found\n");
+ return -1;
+ }
+
+ hw_srnprot = ioremap(res.start, resource_size(&res));
+ old = ioread32be(hw_srnprot);
+
+ /* TODO: figure out why we use this magic constant. I obtained it by
+ * reading the leftover value after boot, after IOSU already ran.
+ *
+ * On my Wii U, setting this register to 1 prevents the console from
+ * rebooting properly, so wiiubrew.org must be missing something.
+ *
+ * See https://wiiubrew.org/wiki/Hardware/Latte_registers
+ */
+ if (old != 0x7bf)
+ iowrite32be(0x7bf, hw_srnprot);
+
+ /* Get the offset from RTC SRAM.
+ *
+ * Its default location on the GameCube and on the Wii is in the SRAM,
+ * while on the Wii U the bootloader needs to fill it with the contents
+ * of /config/rtc.xml on the SLC (the eMMC). We don’t do that from
+ * Linux since it requires implementing a proprietary filesystem and do
+ * file decryption, instead we require the bootloader to fill the same
+ * SRAM address as on previous consoles.
+ */
+ ret = regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+ if (ret) {
+ pr_err("failed to get the RTC bias\n");
+ return -1;
+ }
+
+ /* Reset SRAM access to how it was before, our job here is done. */
+ if (old != 0x7bf)
+ iowrite32be(old, hw_srnprot);
+ iounmap(hw_srnprot);
+
+ return 0;
+}
+
+static const struct regmap_range rtc_rd_ranges[] = {
+ regmap_reg_range(0x200000, 0x200010),
+ regmap_reg_range(0x204000, 0x204000),
+ regmap_reg_range(0x210000, 0x210001),
+ regmap_reg_range(0x210004, 0x210007),
+ regmap_reg_range(0x21000c, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_rd_regs = {
+ .yes_ranges = rtc_rd_ranges,
+ .n_yes_ranges = ARRAY_SIZE(rtc_rd_ranges),
+};
+
+static const struct regmap_range rtc_wr_ranges[] = {
+ regmap_reg_range(0x200000, 0x200010),
+ regmap_reg_range(0x204000, 0x204000),
+ regmap_reg_range(0x210000, 0x210001),
+ regmap_reg_range(0x21000d, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_wr_regs = {
+ .yes_ranges = rtc_wr_ranges,
+ .n_yes_ranges = ARRAY_SIZE(rtc_wr_ranges),
+};
+
+static const struct regmap_config gamecube_rtc_regmap_config = {
+ .reg_bits = 24,
+ .val_bits = 32,
+ .rd_table = &rtc_rd_regs,
+ .wr_table = &rtc_wr_regs,
+ .max_register = 0x21000d,
+ .name = "gamecube-rtc",
+};
+
+static int gamecube_rtc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rtc_device *rtc;
+ struct priv *d;
+ int ret;
+
+ d = devm_kzalloc(dev, sizeof(struct priv), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->iob = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(d->iob))
+ return PTR_ERR(d->iob);
+
+ d->regmap = devm_regmap_init(dev, &exi_bus, d,
+ &gamecube_rtc_regmap_config);
+ if (IS_ERR(d->regmap))
+ return PTR_ERR(d->regmap);
+
+ ret = gamecube_rtc_read_offset_from_sram(d);
+ if (ret)
+ return ret;
+ dev_dbg(dev, "SRAM bias: 0x%x", d->rtc_bias);
+
+ dev_set_drvdata(dev, d);
+
+ rtc = devm_rtc_allocate_device(dev);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ /* We can represent further than that, but it depends on the stored
+ * bias and we can’t modify it persistently on all supported consoles,
+ * so here we pretend to be limited to 2106.
+ */
+ rtc->range_min = 0;
+ rtc->range_max = U32_MAX;
+ rtc->ops = &gamecube_rtc_ops;
+
+ devm_rtc_register_device(rtc);
+
+ return 0;
+}
+
+static const struct of_device_id gamecube_rtc_of_match[] = {
+ {.compatible = "nintendo,latte-exi" },
+ {.compatible = "nintendo,hollywood-exi" },
+ {.compatible = "nintendo,flipper-exi" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, gamecube_rtc_of_match);
+
+static struct platform_driver gamecube_rtc_driver = {
+ .probe = gamecube_rtc_probe,
+ .driver = {
+ .name = "rtc-gamecube",
+ .of_match_table = gamecube_rtc_of_match,
+ },
+};
+module_platform_driver(gamecube_rtc_driver);
+
+MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
+MODULE_DESCRIPTION("Nintendo GameCube, Wii and Wii U RTC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
index dcfaf09946ee..ae9f131b43c0 100644
--- a/drivers/rtc/rtc-mc146818-lib.c
+++ b/drivers/rtc/rtc-mc146818-lib.c
@@ -8,48 +8,100 @@
#include <linux/acpi.h>
#endif
-unsigned int mc146818_get_time(struct rtc_time *time)
+/*
+ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
+ * unset.
+ *
+ * Warning: callback may be executed more then once.
+ */
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ void *param)
{
- unsigned char ctrl;
+ int i;
unsigned long flags;
- unsigned char century = 0;
- bool retry;
+ unsigned char seconds;
-#ifdef CONFIG_MACH_DECSTATION
- unsigned int real_year;
-#endif
+ for (i = 0; i < 10; i++) {
+ spin_lock_irqsave(&rtc_lock, flags);
-again:
- spin_lock_irqsave(&rtc_lock, flags);
- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
- spin_unlock_irqrestore(&rtc_lock, flags);
- memset(time, 0xff, sizeof(*time));
- return 0;
- }
+ /*
+ * Check whether there is an update in progress during which the
+ * readout is unspecified. The maximum update time is ~2ms. Poll
+ * every msec for completion.
+ *
+ * Store the second value before checking UIP so a long lasting
+ * NMI which happens to hit after the UIP check cannot make
+ * an update cycle invisible.
+ */
+ seconds = CMOS_READ(RTC_SECONDS);
- /*
- * Check whether there is an update in progress during which the
- * readout is unspecified. The maximum update time is ~2ms. Poll
- * every msec for completion.
- *
- * Store the second value before checking UIP so a long lasting NMI
- * which happens to hit after the UIP check cannot make an update
- * cycle invisible.
- */
- time->tm_sec = CMOS_READ(RTC_SECONDS);
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ mdelay(1);
+ continue;
+ }
- if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
- spin_unlock_irqrestore(&rtc_lock, flags);
- mdelay(1);
- goto again;
- }
+ /* Revalidate the above readout */
+ if (seconds != CMOS_READ(RTC_SECONDS)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ continue;
+ }
- /* Revalidate the above readout */
- if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
+ if (callback)
+ callback(seconds, param);
+
+ /*
+ * Check for the UIP bit again. If it is set now then
+ * the above values may contain garbage.
+ */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ mdelay(1);
+ continue;
+ }
+
+ /*
+ * A NMI might have interrupted the above sequence so check
+ * whether the seconds value has changed which indicates that
+ * the NMI took longer than the UIP bit was set. Unlikely, but
+ * possible and there is also virt...
+ */
+ if (seconds != CMOS_READ(RTC_SECONDS)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ continue;
+ }
spin_unlock_irqrestore(&rtc_lock, flags);
- goto again;
+
+ return true;
}
+ return false;
+}
+EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
+
+/*
+ * If the UIP (Update-in-progress) bit of the RTC is set for more then
+ * 10ms, the RTC is apparently broken or not present.
+ */
+bool mc146818_does_rtc_work(void)
+{
+ return mc146818_avoid_UIP(NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
+
+struct mc146818_get_time_callback_param {
+ struct rtc_time *time;
+ unsigned char ctrl;
+#ifdef CONFIG_ACPI
+ unsigned char century;
+#endif
+#ifdef CONFIG_MACH_DECSTATION
+ unsigned int real_year;
+#endif
+};
+
+static void mc146818_get_time_callback(unsigned char seconds, void *param_in)
+{
+ struct mc146818_get_time_callback_param *p = param_in;
/*
* Only the values that we read from the RTC are set. We leave
@@ -57,39 +109,39 @@ again:
* RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
* by the RTC when initially set to a non-zero value.
*/
- time->tm_min = CMOS_READ(RTC_MINUTES);
- time->tm_hour = CMOS_READ(RTC_HOURS);
- time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
- time->tm_mon = CMOS_READ(RTC_MONTH);
- time->tm_year = CMOS_READ(RTC_YEAR);
+ p->time->tm_sec = seconds;
+ p->time->tm_min = CMOS_READ(RTC_MINUTES);
+ p->time->tm_hour = CMOS_READ(RTC_HOURS);
+ p->time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+ p->time->tm_mon = CMOS_READ(RTC_MONTH);
+ p->time->tm_year = CMOS_READ(RTC_YEAR);
#ifdef CONFIG_MACH_DECSTATION
- real_year = CMOS_READ(RTC_DEC_YEAR);
+ p->real_year = CMOS_READ(RTC_DEC_YEAR);
#endif
#ifdef CONFIG_ACPI
if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- century = CMOS_READ(acpi_gbl_FADT.century);
+ acpi_gbl_FADT.century) {
+ p->century = CMOS_READ(acpi_gbl_FADT.century);
+ } else {
+ p->century = 0;
+ }
#endif
- ctrl = CMOS_READ(RTC_CONTROL);
- /*
- * Check for the UIP bit again. If it is set now then
- * the above values may contain garbage.
- */
- retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
- /*
- * A NMI might have interrupted the above sequence so check whether
- * the seconds value has changed which indicates that the NMI took
- * longer than the UIP bit was set. Unlikely, but possible and
- * there is also virt...
- */
- retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
- spin_unlock_irqrestore(&rtc_lock, flags);
+ p->ctrl = CMOS_READ(RTC_CONTROL);
+}
- if (retry)
- goto again;
+int mc146818_get_time(struct rtc_time *time)
+{
+ struct mc146818_get_time_callback_param p = {
+ .time = time
+ };
+
+ if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) {
+ memset(time, 0, sizeof(*time));
+ return -EIO;
+ }
- if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
time->tm_sec = bcd2bin(time->tm_sec);
time->tm_min = bcd2bin(time->tm_min);
@@ -97,15 +149,19 @@ again:
time->tm_mday = bcd2bin(time->tm_mday);
time->tm_mon = bcd2bin(time->tm_mon);
time->tm_year = bcd2bin(time->tm_year);
- century = bcd2bin(century);
+#ifdef CONFIG_ACPI
+ p.century = bcd2bin(p.century);
+#endif
}
#ifdef CONFIG_MACH_DECSTATION
- time->tm_year += real_year - 72;
+ time->tm_year += p.real_year - 72;
#endif
- if (century > 20)
- time->tm_year += (century - 19) * 100;
+#ifdef CONFIG_ACPI
+ if (p.century > 19)
+ time->tm_year += (p.century - 19) * 100;
+#endif
/*
* Account for differences between how the RTC uses the values
@@ -116,7 +172,7 @@ again:
time->tm_mon--;
- return RTC_24H;
+ return 0;
}
EXPORT_SYMBOL_GPL(mc146818_get_time);
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 56c58b055dff..81a5b1f2e68c 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -748,7 +748,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
/*
* Enable timestamp function and store timestamp of first trigger
- * event until TSF1 and TFS2 interrupt flags are cleared.
+ * event until TSF1 and TSF2 interrupt flags are cleared.
*/
ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
PCF2127_BIT_TS_CTRL_TSOFF |
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 15e50bb10cf0..df2b072c394d 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -514,21 +514,56 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
}
#endif
-static const struct pcf85063_config pcf85063tp_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
+enum pcf85063_type {
+ PCF85063,
+ PCF85063TP,
+ PCF85063A,
+ RV8263,
+ PCF85063_LAST_ID
+};
+
+static struct pcf85063_config pcf85063_cfg[] = {
+ [PCF85063] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+ },
+ [PCF85063TP] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+ },
+ [PCF85063A] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ },
+ [RV8263] = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ .force_cap_7000 = 1,
},
};
+static const struct i2c_device_id pcf85063_ids[];
+
static int pcf85063_probe(struct i2c_client *client)
{
struct pcf85063 *pcf85063;
unsigned int tmp;
int err;
- const struct pcf85063_config *config = &pcf85063tp_config;
- const void *data = of_device_get_match_data(&client->dev);
+ const struct pcf85063_config *config;
struct nvmem_config nvmem_cfg = {
.name = "pcf85063_nvram",
.reg_read = pcf85063_nvmem_read,
@@ -544,8 +579,17 @@ static int pcf85063_probe(struct i2c_client *client)
if (!pcf85063)
return -ENOMEM;
- if (data)
- config = data;
+ if (client->dev.of_node) {
+ config = of_device_get_match_data(&client->dev);
+ if (!config)
+ return -ENODEV;
+ } else {
+ enum pcf85063_type type =
+ i2c_match_id(pcf85063_ids, client)->driver_data;
+ if (type >= PCF85063_LAST_ID)
+ return -ENODEV;
+ config = &pcf85063_cfg[type];
+ }
pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
if (IS_ERR(pcf85063->regmap))
@@ -604,31 +648,21 @@ static int pcf85063_probe(struct i2c_client *client)
return devm_rtc_register_device(pcf85063->rtc);
}
-#ifdef CONFIG_OF
-static const struct pcf85063_config pcf85063a_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
-};
-
-static const struct pcf85063_config rv8263_config = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
- .force_cap_7000 = 1,
+static const struct i2c_device_id pcf85063_ids[] = {
+ { "pcf85063", PCF85063 },
+ { "pcf85063tp", PCF85063TP },
+ { "pcf85063a", PCF85063A },
+ { "rv8263", RV8263 },
+ {}
};
+MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
+#ifdef CONFIG_OF
static const struct of_device_id pcf85063_of_match[] = {
- { .compatible = "nxp,pcf85063", .data = &pcf85063tp_config },
- { .compatible = "nxp,pcf85063tp", .data = &pcf85063tp_config },
- { .compatible = "nxp,pcf85063a", .data = &pcf85063a_config },
- { .compatible = "microcrystal,rv8263", .data = &rv8263_config },
+ { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
+ { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
+ { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
+ { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
{}
};
MODULE_DEVICE_TABLE(of, pcf85063_of_match);
@@ -640,6 +674,7 @@ static struct i2c_driver pcf85063_driver = {
.of_match_table = of_match_ptr(pcf85063_of_match),
},
.probe_new = pcf85063_probe,
+ .id_table = pcf85063_ids,
};
module_i2c_driver(pcf85063_driver);
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index d2f1d8f754bf..cf8119b6d320 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
if (sa1100_rtc->irq_alarm < 0)
return -ENXIO;
+ sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+ if (IS_ERR(sa1100_rtc->rtc))
+ return PTR_ERR(sa1100_rtc->rtc);
+
pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
resource_size(pxa_rtc->ress));
if (!pxa_rtc->base) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 80980414890c..cb15983383f5 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -28,8 +28,10 @@
#define RS5C372_REG_MONTH 5
#define RS5C372_REG_YEAR 6
#define RS5C372_REG_TRIM 7
-# define RS5C372_TRIM_XSL 0x80
+# define RS5C372_TRIM_XSL 0x80 /* only if RS5C372[a|b] */
# define RS5C372_TRIM_MASK 0x7F
+# define R2221TL_TRIM_DEV (1 << 7) /* only if R2221TL */
+# define RS5C372_TRIM_DECR (1 << 6)
#define RS5C_REG_ALARM_A_MIN 8 /* or ALARM_W */
#define RS5C_REG_ALARM_A_HOURS 9
@@ -324,8 +326,12 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
u8 tmp = rs5c372->regs[RS5C372_REG_TRIM];
- if (osc)
- *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+ if (osc) {
+ if (rs5c372->type == rtc_rs5c372a || rs5c372->type == rtc_rs5c372b)
+ *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+ else
+ *osc = 32768;
+ }
if (trim) {
dev_dbg(&client->dev, "%s: raw trim=%x\n", __func__, tmp);
@@ -485,6 +491,176 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
#define rs5c372_rtc_proc NULL
#endif
+#ifdef CONFIG_RTC_INTF_DEV
+static int rs5c372_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ unsigned char ctrl2;
+ int addr;
+ unsigned int flags;
+
+ dev_dbg(dev, "%s: cmd=%x\n", __func__, cmd);
+
+ addr = RS5C_ADDR(RS5C_REG_CTRL2);
+ ctrl2 = i2c_smbus_read_byte_data(rs5c->client, addr);
+
+ switch (cmd) {
+ case RTC_VL_READ:
+ flags = 0;
+
+ switch (rs5c->type) {
+ case rtc_r2025sd:
+ case rtc_r2221tl:
+ if ((rs5c->type == rtc_r2025sd && !(ctrl2 & R2x2x_CTRL2_XSTP)) ||
+ (rs5c->type == rtc_r2221tl && (ctrl2 & R2x2x_CTRL2_XSTP))) {
+ flags |= RTC_VL_DATA_INVALID;
+ }
+ if (ctrl2 & R2x2x_CTRL2_VDET)
+ flags |= RTC_VL_BACKUP_LOW;
+ break;
+ default:
+ if (ctrl2 & RS5C_CTRL2_XSTP)
+ flags |= RTC_VL_DATA_INVALID;
+ break;
+ }
+
+ return put_user(flags, (unsigned int __user *)arg);
+ case RTC_VL_CLR:
+ /* clear VDET bit */
+ if (rs5c->type == rtc_r2025sd || rs5c->type == rtc_r2221tl) {
+ ctrl2 &= ~R2x2x_CTRL2_VDET;
+ if (i2c_smbus_write_byte_data(rs5c->client, addr, ctrl2) < 0) {
+ dev_dbg(&rs5c->client->dev, "%s: write error in line %i\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ }
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return 0;
+}
+#else
+#define rs5c372_ioctl NULL
+#endif
+
+static int rs5c372_read_offset(struct device *dev, long *offset)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ u8 val = rs5c->regs[RS5C372_REG_TRIM];
+ long ppb_per_step = 0;
+ bool decr = val & RS5C372_TRIM_DECR;
+
+ switch (rs5c->type) {
+ case rtc_r2221tl:
+ ppb_per_step = val & R2221TL_TRIM_DEV ? 1017 : 3051;
+ break;
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ ppb_per_step = val & RS5C372_TRIM_XSL ? 3125 : 3051;
+ break;
+ default:
+ ppb_per_step = 3051;
+ break;
+ }
+
+ /* Only bits[0:5] repsents the time counts */
+ val &= 0x3F;
+
+ /* If bits[1:5] are all 0, it means no increment or decrement */
+ if (!(val & 0x3E)) {
+ *offset = 0;
+ } else {
+ if (decr)
+ *offset = -(((~val) & 0x3F) + 1) * ppb_per_step;
+ else
+ *offset = (val - 1) * ppb_per_step;
+ }
+
+ return 0;
+}
+
+static int rs5c372_set_offset(struct device *dev, long offset)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+ int addr = RS5C_ADDR(RS5C372_REG_TRIM);
+ u8 val = 0;
+ u8 tmp = 0;
+ long ppb_per_step = 3051;
+ long steps = LONG_MIN;
+
+ switch (rs5c->type) {
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ tmp = rs5c->regs[RS5C372_REG_TRIM];
+ if (tmp & RS5C372_TRIM_XSL) {
+ ppb_per_step = 3125;
+ val |= RS5C372_TRIM_XSL;
+ }
+ break;
+ case rtc_r2221tl:
+ /*
+ * Check if it is possible to use high resolution mode (DEV=1).
+ * In this mode, the minimum resolution is 2 / (32768 * 20 * 3),
+ * which is about 1017 ppb.
+ */
+ steps = DIV_ROUND_CLOSEST(offset, 1017);
+ if (steps >= -0x3E && steps <= 0x3E) {
+ ppb_per_step = 1017;
+ val |= R2221TL_TRIM_DEV;
+ } else {
+ /*
+ * offset is out of the range of high resolution mode.
+ * Try to use low resolution mode (DEV=0). In this mode,
+ * the minimum resolution is 2 / (32768 * 20), which is
+ * about 3051 ppb.
+ */
+ steps = LONG_MIN;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (steps == LONG_MIN) {
+ steps = DIV_ROUND_CLOSEST(offset, ppb_per_step);
+ if (steps > 0x3E || steps < -0x3E)
+ return -ERANGE;
+ }
+
+ if (steps > 0) {
+ val |= steps + 1;
+ } else {
+ val |= RS5C372_TRIM_DECR;
+ val |= (~(-steps - 1)) & 0x3F;
+ }
+
+ if (!steps || !(val & 0x3E)) {
+ /*
+ * if offset is too small, set oscillation adjustment register
+ * or time trimming register with its default value whic means
+ * no increment or decrement. But for rs5c372[a|b], the XSL bit
+ * should be kept unchanged.
+ */
+ if (rs5c->type == rtc_rs5c372a || rs5c->type == rtc_rs5c372b)
+ val &= RS5C372_TRIM_XSL;
+ else
+ val = 0;
+ }
+
+ dev_dbg(&rs5c->client->dev, "write 0x%x for offset %ld\n", val, offset);
+
+ if (i2c_smbus_write_byte_data(rs5c->client, addr, val) < 0) {
+ dev_err(&rs5c->client->dev, "failed to write 0x%x to reg %d\n", val, addr);
+ return -EIO;
+ }
+
+ rs5c->regs[RS5C372_REG_TRIM] = val;
+
+ return 0;
+}
+
static const struct rtc_class_ops rs5c372_rtc_ops = {
.proc = rs5c372_rtc_proc,
.read_time = rs5c372_rtc_read_time,
@@ -492,6 +668,9 @@ static const struct rtc_class_ops rs5c372_rtc_ops = {
.read_alarm = rs5c_read_alarm,
.set_alarm = rs5c_set_alarm,
.alarm_irq_enable = rs5c_rtc_alarm_irq_enable,
+ .ioctl = rs5c372_ioctl,
+ .read_offset = rs5c372_read_offset,
+ .set_offset = rs5c372_set_offset,
};
#if IS_ENABLED(CONFIG_RTC_INTF_SYSFS)
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 0d5ed38bf60c..f69e0b1137cd 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -55,6 +55,7 @@
enum rv8803_type {
rv_8803,
+ rx_8804,
rx_8900
};
@@ -601,6 +602,7 @@ static int rv8803_probe(struct i2c_client *client,
static const struct i2c_device_id rv8803_id[] = {
{ "rv8803", rv_8803 },
+ { "rv8804", rx_8804 },
{ "rx8803", rv_8803 },
{ "rx8900", rx_8900 },
{ }
@@ -617,6 +619,10 @@ static const __maybe_unused struct of_device_id rv8803_of_match[] = {
.data = (void *)rv_8803
},
{
+ .compatible = "epson,rx8804",
+ .data = (void *)rx_8804
+ },
+ {
.compatible = "epson,rx8900",
.data = (void *)rx_8900
},
diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c
new file mode 100644
index 000000000000..e8e2ab1103fc
--- /dev/null
+++ b/drivers/rtc/rtc-sunplus.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * The RTC driver for Sunplus SP7021
+ *
+ * Copyright (C) 2019 Sunplus Technology Inc., All rights reseerved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/rtc.h>
+
+#define RTC_REG_NAME "rtc"
+
+#define RTC_CTRL 0x40
+#define TIMER_FREEZE_MASK_BIT BIT(5 + 16)
+#define TIMER_FREEZE BIT(5)
+#define DIS_SYS_RST_RTC_MASK_BIT BIT(4 + 16)
+#define DIS_SYS_RST_RTC BIT(4)
+#define RTC32K_MODE_RESET_MASK_BIT BIT(3 + 16)
+#define RTC32K_MODE_RESET BIT(3)
+#define ALARM_EN_OVERDUE_MASK_BIT BIT(2 + 16)
+#define ALARM_EN_OVERDUE BIT(2)
+#define ALARM_EN_PMC_MASK_BIT BIT(1 + 16)
+#define ALARM_EN_PMC BIT(1)
+#define ALARM_EN_MASK_BIT BIT(0 + 16)
+#define ALARM_EN BIT(0)
+#define RTC_TIMER_OUT 0x44
+#define RTC_DIVIDER 0x48
+#define RTC_TIMER_SET 0x4c
+#define RTC_ALARM_SET 0x50
+#define RTC_USER_DATA 0x54
+#define RTC_RESET_RECORD 0x58
+#define RTC_BATT_CHARGE_CTRL 0x5c
+#define BAT_CHARGE_RSEL_MASK_BIT GENMASK(3 + 16, 2 + 16)
+#define BAT_CHARGE_RSEL_MASK GENMASK(3, 2)
+#define BAT_CHARGE_RSEL_2K_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 0)
+#define BAT_CHARGE_RSEL_250_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 1)
+#define BAT_CHARGE_RSEL_50_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 2)
+#define BAT_CHARGE_RSEL_0_OHM FIELD_PREP(BAT_CHARGE_RSEL_MASK, 3)
+#define BAT_CHARGE_DSEL_MASK_BIT BIT(1 + 16)
+#define BAT_CHARGE_DSEL_MASK GENMASK(1, 1)
+#define BAT_CHARGE_DSEL_ON FIELD_PREP(BAT_CHARGE_DSEL_MASK, 0)
+#define BAT_CHARGE_DSEL_OFF FIELD_PREP(BAT_CHARGE_DSEL_MASK, 1)
+#define BAT_CHARGE_EN_MASK_BIT BIT(0 + 16)
+#define BAT_CHARGE_EN BIT(0)
+#define RTC_TRIM_CTRL 0x60
+
+struct sunplus_rtc {
+ struct rtc_device *rtc;
+ struct resource *res;
+ struct clk *rtcclk;
+ struct reset_control *rstc;
+ void __iomem *reg_base;
+ int irq;
+};
+
+static void sp_get_seconds(struct device *dev, unsigned long *secs)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ *secs = (unsigned long)readl(sp_rtc->reg_base + RTC_TIMER_OUT);
+}
+
+static void sp_set_seconds(struct device *dev, unsigned long secs)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ writel((u32)secs, sp_rtc->reg_base + RTC_TIMER_SET);
+}
+
+static int sp_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned long secs;
+
+ sp_get_seconds(dev, &secs);
+ rtc_time64_to_tm(secs, tm);
+
+ return 0;
+}
+
+static int sp_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned long secs;
+
+ secs = rtc_tm_to_time64(tm);
+ dev_dbg(dev, "%s, secs = %lu\n", __func__, secs);
+ sp_set_seconds(dev, secs);
+
+ return 0;
+}
+
+static int sp_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+ unsigned long alarm_time;
+
+ alarm_time = rtc_tm_to_time64(&alrm->time);
+ dev_dbg(dev, "%s, alarm_time: %u\n", __func__, (u32)(alarm_time));
+ writel((u32)alarm_time, sp_rtc->reg_base + RTC_ALARM_SET);
+
+ return 0;
+}
+
+static int sp_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+ unsigned int alarm_time;
+
+ alarm_time = readl(sp_rtc->reg_base + RTC_ALARM_SET);
+ dev_dbg(dev, "%s, alarm_time: %u\n", __func__, alarm_time);
+
+ if (alarm_time == 0)
+ alrm->enabled = 0;
+ else
+ alrm->enabled = 1;
+
+ rtc_time64_to_tm((unsigned long)(alarm_time), &alrm->time);
+
+ return 0;
+}
+
+static int sp_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (enabled)
+ writel((TIMER_FREEZE_MASK_BIT | DIS_SYS_RST_RTC_MASK_BIT |
+ RTC32K_MODE_RESET_MASK_BIT | ALARM_EN_OVERDUE_MASK_BIT |
+ ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+ (DIS_SYS_RST_RTC | ALARM_EN_OVERDUE | ALARM_EN_PMC | ALARM_EN),
+ sp_rtc->reg_base + RTC_CTRL);
+ else
+ writel((ALARM_EN_OVERDUE_MASK_BIT | ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+ 0x0, sp_rtc->reg_base + RTC_CTRL);
+
+ return 0;
+}
+
+static const struct rtc_class_ops sp_rtc_ops = {
+ .read_time = sp_rtc_read_time,
+ .set_time = sp_rtc_set_time,
+ .set_alarm = sp_rtc_set_alarm,
+ .read_alarm = sp_rtc_read_alarm,
+ .alarm_irq_enable = sp_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t sp_rtc_irq_handler(int irq, void *dev_id)
+{
+ struct platform_device *plat_dev = dev_id;
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+ rtc_update_irq(sp_rtc->rtc, 1, RTC_IRQF | RTC_AF);
+ dev_dbg(&plat_dev->dev, "[RTC] ALARM INT\n");
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * -------------------------------------------------------------------------------------
+ * bat_charge_rsel bat_charge_dsel bat_charge_en Remarks
+ * x x 0 Disable
+ * 0 0 1 0.86mA (2K Ohm with diode)
+ * 1 0 1 1.81mA (250 Ohm with diode)
+ * 2 0 1 2.07mA (50 Ohm with diode)
+ * 3 0 1 16.0mA (0 Ohm with diode)
+ * 0 1 1 1.36mA (2K Ohm without diode)
+ * 1 1 1 3.99mA (250 Ohm without diode)
+ * 2 1 1 4.41mA (50 Ohm without diode)
+ * 3 1 1 16.0mA (0 Ohm without diode)
+ * -------------------------------------------------------------------------------------
+ */
+static void sp_rtc_set_trickle_charger(struct device dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&dev);
+ u32 ohms, rsel;
+ u32 chargeable;
+
+ if (of_property_read_u32(dev.of_node, "trickle-resistor-ohms", &ohms) ||
+ of_property_read_u32(dev.of_node, "aux-voltage-chargeable", &chargeable)) {
+ dev_warn(&dev, "battery charger disabled\n");
+ return;
+ }
+
+ switch (ohms) {
+ case 2000:
+ rsel = BAT_CHARGE_RSEL_2K_OHM;
+ break;
+ case 250:
+ rsel = BAT_CHARGE_RSEL_250_OHM;
+ break;
+ case 50:
+ rsel = BAT_CHARGE_RSEL_50_OHM;
+ break;
+ case 0:
+ rsel = BAT_CHARGE_RSEL_0_OHM;
+ break;
+ default:
+ dev_err(&dev, "invalid charger resistor value (%d)\n", ohms);
+ return;
+ }
+
+ writel(BAT_CHARGE_RSEL_MASK_BIT | rsel, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+
+ switch (chargeable) {
+ case 0:
+ writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_OFF,
+ sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+ break;
+ case 1:
+ writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_ON,
+ sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+ break;
+ default:
+ dev_err(&dev, "invalid aux-voltage-chargeable value (%d)\n", chargeable);
+ return;
+ }
+
+ writel(BAT_CHARGE_EN_MASK_BIT | BAT_CHARGE_EN, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+}
+
+static int sp_rtc_probe(struct platform_device *plat_dev)
+{
+ struct sunplus_rtc *sp_rtc;
+ int ret;
+
+ sp_rtc = devm_kzalloc(&plat_dev->dev, sizeof(*sp_rtc), GFP_KERNEL);
+ if (!sp_rtc)
+ return -ENOMEM;
+
+ sp_rtc->res = platform_get_resource_byname(plat_dev, IORESOURCE_MEM, RTC_REG_NAME);
+ sp_rtc->reg_base = devm_ioremap_resource(&plat_dev->dev, sp_rtc->res);
+ if (IS_ERR(sp_rtc->reg_base))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->reg_base),
+ "%s devm_ioremap_resource fail\n", RTC_REG_NAME);
+ dev_dbg(&plat_dev->dev, "res = 0x%x, reg_base = 0x%lx\n",
+ sp_rtc->res->start, (unsigned long)sp_rtc->reg_base);
+
+ sp_rtc->irq = platform_get_irq(plat_dev, 0);
+ if (sp_rtc->irq < 0)
+ return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+
+ ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
+ IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
+ if (ret)
+ return dev_err_probe(&plat_dev->dev, ret, "devm_request_irq failed:\n");
+
+ sp_rtc->rtcclk = devm_clk_get(&plat_dev->dev, NULL);
+ if (IS_ERR(sp_rtc->rtcclk))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rtcclk),
+ "devm_clk_get fail\n");
+
+ sp_rtc->rstc = devm_reset_control_get_exclusive(&plat_dev->dev, NULL);
+ if (IS_ERR(sp_rtc->rstc))
+ return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rstc),
+ "failed to retrieve reset controller\n");
+
+ ret = clk_prepare_enable(sp_rtc->rtcclk);
+ if (ret)
+ goto free_clk;
+
+ ret = reset_control_deassert(sp_rtc->rstc);
+ if (ret)
+ goto free_reset_assert;
+
+ device_init_wakeup(&plat_dev->dev, 1);
+ dev_set_drvdata(&plat_dev->dev, sp_rtc);
+
+ sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev);
+ if (IS_ERR(sp_rtc->rtc)) {
+ ret = PTR_ERR(sp_rtc->rtc);
+ goto free_reset_assert;
+ }
+
+ sp_rtc->rtc->range_max = U32_MAX;
+ sp_rtc->rtc->range_min = 0;
+ sp_rtc->rtc->ops = &sp_rtc_ops;
+
+ ret = devm_rtc_register_device(sp_rtc->rtc);
+ if (ret)
+ goto free_reset_assert;
+
+ /* Setup trickle charger */
+ if (plat_dev->dev.of_node)
+ sp_rtc_set_trickle_charger(plat_dev->dev);
+
+ /* Keep RTC from system reset */
+ writel(DIS_SYS_RST_RTC_MASK_BIT | DIS_SYS_RST_RTC, sp_rtc->reg_base + RTC_CTRL);
+
+ return 0;
+
+free_reset_assert:
+ reset_control_assert(sp_rtc->rstc);
+free_clk:
+ clk_disable_unprepare(sp_rtc->rtcclk);
+
+ return ret;
+}
+
+static int sp_rtc_remove(struct platform_device *plat_dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+ device_init_wakeup(&plat_dev->dev, 0);
+ reset_control_assert(sp_rtc->rstc);
+ clk_disable_unprepare(sp_rtc->rtcclk);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sp_rtc_suspend(struct device *dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (device_may_wakeup(dev))
+ enable_irq_wake(sp_rtc->irq);
+
+ return 0;
+}
+
+static int sp_rtc_resume(struct device *dev)
+{
+ struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+ if (device_may_wakeup(dev))
+ disable_irq_wake(sp_rtc->irq);
+
+ return 0;
+}
+#endif
+
+static const struct of_device_id sp_rtc_of_match[] = {
+ { .compatible = "sunplus,sp7021-rtc" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sp_rtc_of_match);
+
+static SIMPLE_DEV_PM_OPS(sp_rtc_pm_ops, sp_rtc_suspend, sp_rtc_resume);
+
+static struct platform_driver sp_rtc_driver = {
+ .probe = sp_rtc_probe,
+ .remove = sp_rtc_remove,
+ .driver = {
+ .name = "sp7021-rtc",
+ .of_match_table = sp_rtc_of_match,
+ .pm = &sp_rtc_pm_ops,
+ },
+};
+module_platform_driver(sp_rtc_driver);
+
+MODULE_AUTHOR("Vincent Shih <vincent.sunplus@gmail.com>");
+MODULE_DESCRIPTION("Sunplus RTC driver");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 59f6b7b2a70a..b04d039da276 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -271,7 +271,7 @@ MODULE_PARM_DESC(msi, "IRQ handling."
" 0=PIC(default), 1=MSI, 2=MSI-X)");
module_param(startup_timeout, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(startup_timeout, "The duration of time in seconds to wait for"
- " adapter to have it's kernel up and\n"
+ " adapter to have its kernel up and\n"
"running. This is typically adjusted for large systems that do not"
" have a BIOS.");
module_param(aif_timeout, int, S_IRUGO|S_IWUSR);
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 5d566d2b2997..928099163f0f 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -755,11 +755,7 @@ ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
static int
ahd_linux_abort(struct scsi_cmnd *cmd)
{
- int error;
-
- error = ahd_linux_queue_abort_cmd(cmd);
-
- return error;
+ return ahd_linux_queue_abort_cmd(cmd);
}
/*
diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c
index ae62fc3c9ee3..b08fc8839808 100644
--- a/drivers/scsi/elx/efct/efct_driver.c
+++ b/drivers/scsi/elx/efct/efct_driver.c
@@ -541,13 +541,10 @@ efct_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, efct);
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) {
- dev_warn(&pdev->dev, "trying DMA_BIT_MASK(32)\n");
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
- dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n");
- rc = -1;
- goto dma_mask_out;
- }
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc) {
+ dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n");
+ goto dma_mask_out;
}
num_interrupts = efct_device_interrupts_required(efct);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index f46f679fe825..a05ec7aece5a 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1525,16 +1525,11 @@ static void hisi_sas_send_ata_reset_each_phy(struct hisi_hba *hisi_hba,
struct device *dev = hisi_hba->dev;
int s = sizeof(struct host_to_dev_fis);
int rc = TMF_RESP_FUNC_FAILED;
- struct asd_sas_phy *sas_phy;
struct ata_link *link;
u8 fis[20] = {0};
- u32 state;
int i;
- state = hisi_hba->hw->get_phys_state(hisi_hba);
for (i = 0; i < hisi_hba->n_phy; i++) {
- if (!(state & BIT(sas_phy->id)))
- continue;
if (!(sas_port->phy_mask & BIT(i)))
continue;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cd26c0f8c281..1bc0db572d9e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -17982,8 +17982,8 @@ lpfc_sli4_alloc_xri(struct lpfc_hba *phba)
* the driver starts at 0 each time.
*/
spin_lock_irq(&phba->hbalock);
- xri = find_next_zero_bit(phba->sli4_hba.xri_bmask,
- phba->sli4_hba.max_cfg_param.max_xri, 0);
+ xri = find_first_zero_bit(phba->sli4_hba.xri_bmask,
+ phba->sli4_hba.max_cfg_param.max_xri);
if (xri >= phba->sli4_hba.max_cfg_param.max_xri) {
spin_unlock_irq(&phba->hbalock);
return NO_XRI;
@@ -19660,7 +19660,7 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
rpi_limit = phba->sli4_hba.next_rpi;
- rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0);
+ rpi = find_first_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit);
if (rpi >= rpi_limit)
rpi = LPFC_RPI_ALLOC_ERROR;
else {
@@ -20303,8 +20303,8 @@ next_priority:
* have been tested so that we can detect when we should
* change the priority level.
*/
- next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask,
- LPFC_SLI4_FCF_TBL_INDX_MAX, 0);
+ next_fcf_index = find_first_bit(phba->fcf.fcf_rr_bmask,
+ LPFC_SLI4_FCF_TBL_INDX_MAX);
}
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 0d31d7a5e335..bf987f3a7f3f 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -192,23 +192,21 @@ mega_query_adapter(adapter_t *adapter)
{
dma_addr_t prod_info_dma_handle;
mega_inquiry3 *inquiry3;
- u8 raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ u8 *raw_mbox = (u8 *)&mbox;
int retval;
/* Initialize adapter inquiry mailbox */
- mbox = (mbox_t *)raw_mbox;
-
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* Try to issue Inquiry3 command
* if not succeeded, then issue MEGA_MBOXCMD_ADAPTERINQ command and
* update enquiry3 structure
*/
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
inquiry3 = (mega_inquiry3 *)adapter->mega_buffer;
@@ -232,10 +230,10 @@ mega_query_adapter(adapter_t *adapter)
inq = &ext_inq->raid_inq;
- mbox->m_out.xferaddr = (u32)dma_handle;
+ mbox.xferaddr = (u32)dma_handle;
/*issue old 0x04 command to adapter */
- mbox->m_out.cmd = MEGA_MBOXCMD_ADPEXTINQ;
+ mbox.cmd = MEGA_MBOXCMD_ADPEXTINQ;
issue_scb_block(adapter, raw_mbox);
@@ -262,7 +260,7 @@ mega_query_adapter(adapter_t *adapter)
sizeof(mega_product_info),
DMA_FROM_DEVICE);
- mbox->m_out.xferaddr = prod_info_dma_handle;
+ mbox.xferaddr = prod_info_dma_handle;
raw_mbox[0] = FC_NEW_CONFIG; /* i.e. mbox->cmd=0xA1 */
raw_mbox[2] = NC_SUBOP_PRODUCT_INFO; /* i.e. 0x0E */
@@ -3569,16 +3567,14 @@ mega_n_to_m(void __user *arg, megacmd_t *mc)
static int
mega_is_bios_enabled(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
-
- mbox = (mbox_t *)raw_mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
raw_mbox[0] = IS_BIOS_ENABLED;
raw_mbox[2] = GET_BIOS;
@@ -3600,13 +3596,11 @@ mega_is_bios_enabled(adapter_t *adapter)
static void
mega_enum_raid_scsi(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int i;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command to find out what channels are raid/scsi
@@ -3616,7 +3610,7 @@ mega_enum_raid_scsi(adapter_t *adapter)
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
/*
* Non-ROMB firmware fail this command, so all channels
@@ -3655,23 +3649,21 @@ static void
mega_get_boot_drv(adapter_t *adapter)
{
struct private_bios_data *prv_bios_data;
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
u16 cksum = 0;
u8 *cksum_p;
u8 boot_pdrv;
int i;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
raw_mbox[0] = BIOS_PVT_DATA;
raw_mbox[2] = GET_BIOS_PVT_DATA;
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
adapter->boot_ldrv_enabled = 0;
adapter->boot_ldrv = 0;
@@ -3721,13 +3713,11 @@ mega_get_boot_drv(adapter_t *adapter)
static int
mega_support_random_del(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int rval;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command
@@ -3750,13 +3740,11 @@ mega_support_random_del(adapter_t *adapter)
static int
mega_support_ext_cdb(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
int rval;
- mbox = (mbox_t *)raw_mbox;
-
- memset(&mbox->m_out, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
/*
* issue command to find out if controller supports extended CDBs.
*/
@@ -3865,16 +3853,14 @@ mega_do_del_logdrv(adapter_t *adapter, int logdrv)
static void
mega_get_max_sgl(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- mbox = (mbox_t *)raw_mbox;
-
- memset(mbox, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
raw_mbox[0] = MAIN_MISC_OPCODE;
raw_mbox[2] = GET_MAX_SG_SUPPORT;
@@ -3888,7 +3874,7 @@ mega_get_max_sgl(adapter_t *adapter)
}
else {
adapter->sglen = *((char *)adapter->mega_buffer);
-
+
/*
* Make sure this is not more than the resources we are
* planning to allocate
@@ -3910,16 +3896,14 @@ mega_get_max_sgl(adapter_t *adapter)
static int
mega_support_cluster(adapter_t *adapter)
{
- unsigned char raw_mbox[sizeof(struct mbox_out)];
- mbox_t *mbox;
-
- mbox = (mbox_t *)raw_mbox;
+ struct mbox_out mbox;
+ unsigned char *raw_mbox = (u8 *)&mbox;
- memset(mbox, 0, sizeof(raw_mbox));
+ memset(&mbox, 0, sizeof(mbox));
memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE);
- mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle;
+ mbox.xferaddr = (u32)adapter->buf_dma_handle;
/*
* Try to get the initiator id. This command will succeed iff the
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index c39dd4978c9d..15bdc21ead66 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -901,7 +901,7 @@ static const struct {
},
{ MPI3MR_RESET_FROM_SYSFS, "sysfs invocation" },
{ MPI3MR_RESET_FROM_SYSFS_TIMEOUT, "sysfs TM timeout" },
- { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronus reset" },
+ { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronous reset" },
};
/**
@@ -1242,7 +1242,7 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
ioc_state = mpi3mr_get_iocstate(mrioc);
if (ioc_state == MRIOC_STATE_READY) {
ioc_info(mrioc,
- "successfully transistioned to %s state\n",
+ "successfully transitioned to %s state\n",
mpi3mr_iocstate_name(ioc_state));
return 0;
}
@@ -2174,7 +2174,7 @@ out:
* mpi3mr_check_rh_fault_ioc - check reset history and fault
* controller
* @mrioc: Adapter instance reference
- * @reason_code, reason code for the fault.
+ * @reason_code: reason code for the fault.
*
* This routine will save snapdump and fault the controller with
* the given reason code if it is not already in the fault or
@@ -3633,7 +3633,6 @@ static int mpi3mr_enable_events(struct mpi3mr_ioc *mrioc)
/**
* mpi3mr_init_ioc - Initialize the controller
* @mrioc: Adapter instance reference
- * @init_type: Flag to indicate is the init_type
*
* This the controller initialization routine, executed either
* after soft reset or from pci probe callback.
@@ -3844,7 +3843,7 @@ retry_init:
if (mrioc->shost->nr_hw_queues > mrioc->num_op_reply_q) {
ioc_err(mrioc,
- "cannot create minimum number of operatioanl queues expected:%d created:%d\n",
+ "cannot create minimum number of operational queues expected:%d created:%d\n",
mrioc->shost->nr_hw_queues, mrioc->num_op_reply_q);
goto out_failed_noretry;
}
@@ -4174,7 +4173,7 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc)
/**
* mpi3mr_cleanup_ioc - Cleanup controller
* @mrioc: Adapter instance reference
-
+ *
* controller cleanup handler, Message unit reset or soft reset
* and shutdown notification is issued to the controller.
*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index a0af986633d2..949e98d523e2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,8 +77,8 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "39.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 39
+#define MPT3SAS_DRIVER_VERSION "40.100.00.00"
+#define MPT3SAS_MAJOR_VERSION 40
#define MPT3SAS_MINOR_VERSION 100
#define MPT3SAS_BUILD_VERSION 0
#define MPT3SAS_RELEASE_VERSION 00
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 05b6c6a073c3..d92ca140d298 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -3533,11 +3533,31 @@ diag_trigger_master_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_MASTER_TRIGGER_T *master_tg;
unsigned long flags;
ssize_t rc;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
rc = min(sizeof(struct SL_WH_MASTER_TRIGGER_T), count);
+
+ if (ioc->supports_trigger_pages) {
+ master_tg = kzalloc(sizeof(struct SL_WH_MASTER_TRIGGER_T),
+ GFP_KERNEL);
+ if (!master_tg)
+ return -ENOMEM;
+
+ memcpy(master_tg, buf, rc);
+ if (!master_tg->MasterData)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg1(ioc, master_tg,
+ set)) {
+ kfree(master_tg);
+ return -EFAULT;
+ }
+ kfree(master_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
memset(&ioc->diag_trigger_master, 0,
sizeof(struct SL_WH_MASTER_TRIGGER_T));
memcpy(&ioc->diag_trigger_master, buf, rc);
@@ -3589,11 +3609,31 @@ diag_trigger_event_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_EVENT_TRIGGERS_T *event_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
sz = min(sizeof(struct SL_WH_EVENT_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ event_tg = kzalloc(sizeof(struct SL_WH_EVENT_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!event_tg)
+ return -ENOMEM;
+
+ memcpy(event_tg, buf, sz);
+ if (!event_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg2(ioc, event_tg,
+ set)) {
+ kfree(event_tg);
+ return -EFAULT;
+ }
+ kfree(event_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
memset(&ioc->diag_trigger_event, 0,
sizeof(struct SL_WH_EVENT_TRIGGERS_T));
memcpy(&ioc->diag_trigger_event, buf, sz);
@@ -3644,11 +3684,31 @@ diag_trigger_scsi_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_SCSI_TRIGGERS_T *scsi_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
+
+ sz = min(sizeof(struct SL_WH_SCSI_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ scsi_tg = kzalloc(sizeof(struct SL_WH_SCSI_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!scsi_tg)
+ return -ENOMEM;
+
+ memcpy(scsi_tg, buf, sz);
+ if (!scsi_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg3(ioc, scsi_tg,
+ set)) {
+ kfree(scsi_tg);
+ return -EFAULT;
+ }
+ kfree(scsi_tg);
+ }
spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
- sz = min(sizeof(ioc->diag_trigger_scsi), count);
+
memset(&ioc->diag_trigger_scsi, 0, sizeof(ioc->diag_trigger_scsi));
memcpy(&ioc->diag_trigger_scsi, buf, sz);
if (ioc->diag_trigger_scsi.ValidEntries > NUM_VALID_ENTRIES)
@@ -3698,11 +3758,30 @@ diag_trigger_mpi_store(struct device *cdev,
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
+ struct SL_WH_MPI_TRIGGERS_T *mpi_tg;
unsigned long flags;
ssize_t sz;
+ bool set = 1;
- spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count);
+ if (ioc->supports_trigger_pages) {
+ mpi_tg = kzalloc(sizeof(struct SL_WH_MPI_TRIGGERS_T),
+ GFP_KERNEL);
+ if (!mpi_tg)
+ return -ENOMEM;
+
+ memcpy(mpi_tg, buf, sz);
+ if (!mpi_tg->ValidEntries)
+ set = 0;
+ if (mpt3sas_config_update_driver_trigger_pg4(ioc, mpi_tg,
+ set)) {
+ kfree(mpi_tg);
+ return -EFAULT;
+ }
+ kfree(mpi_tg);
+ }
+
+ spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
memset(&ioc->diag_trigger_mpi, 0,
sizeof(ioc->diag_trigger_mpi));
memcpy(&ioc->diag_trigger_mpi, buf, sz);
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 8b9e889bc306..92c818a8a84a 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1557,6 +1557,9 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
data->MmioAddress = (unsigned long)
ioremap(p_dev->resource[2]->start,
resource_size(p_dev->resource[2]));
+ if (!data->MmioAddress)
+ goto next_entry;
+
data->MmioLength = resource_size(p_dev->resource[2]);
}
/* If we got this far, we're cool! */
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index c9a16eef38c1..160ee8b228c9 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -1199,7 +1199,7 @@ int pm8001_abort_task(struct sas_task *task)
struct pm8001_device *pm8001_dev;
struct pm8001_tmf_task tmf_task;
int rc = TMF_RESP_FUNC_FAILED, ret;
- u32 phy_id;
+ u32 phy_id, port_id;
struct sas_task_slow slow_task;
if (unlikely(!task || !task->lldd_task || !task->dev))
@@ -1246,6 +1246,7 @@ int pm8001_abort_task(struct sas_task *task)
DECLARE_COMPLETION_ONSTACK(completion_reset);
DECLARE_COMPLETION_ONSTACK(completion);
struct pm8001_phy *phy = pm8001_ha->phy + phy_id;
+ port_id = phy->port->port_id;
/* 1. Set Device state as Recovery */
pm8001_dev->setds_completion = &completion;
@@ -1297,6 +1298,10 @@ int pm8001_abort_task(struct sas_task *task)
PORT_RESET_TMO);
if (phy->port_reset_status == PORT_RESET_TMO) {
pm8001_dev_gone_notify(dev);
+ PM8001_CHIP_DISP->hw_event_ack_req(
+ pm8001_ha, 0,
+ 0x07, /*HW_EVENT_PHY_DOWN ack*/
+ port_id, phy_id, 0, 0);
goto out;
}
}
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 83eec16d021d..a17da1cebce1 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -216,6 +216,9 @@ struct pm8001_dispatch {
u32 state);
int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
+ void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
+ u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
+ u32 param1);
};
struct pm8001_chip_info {
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index ad3c6da12715..bbf538fe15b3 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3712,8 +3712,10 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
break;
case HW_EVENT_PORT_RESET_TIMER_TMO:
pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n");
- pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
- port_id, phy_id, 0, 0);
+ if (!pm8001_ha->phy[phy_id].reset_completion) {
+ pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
+ port_id, phy_id, 0, 0);
+ }
sas_phy_disconnected(sas_phy);
phy->phy_attached = 0;
sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
@@ -5055,4 +5057,5 @@ const struct pm8001_dispatch pm8001_80xx_dispatch = {
.fw_flash_update_req = pm8001_chip_fw_flash_update_req,
.set_dev_state_req = pm8001_chip_set_dev_state_req,
.fatal_errors = pm80xx_fatal_errors,
+ .hw_event_ack_req = pm80xx_hw_event_ack_req,
};
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 1bf7a22d4948..cdc66e2a9488 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -1415,6 +1415,8 @@ static void qedf_upload_connection(struct qedf_ctx *qedf,
*/
term_params = dma_alloc_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE,
&term_params_dma, GFP_KERNEL);
+ if (!term_params)
+ return;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Uploading connection "
"port_id=%06x.\n", fcport->rdata->ids.port_id);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 35e381f6d371..0a70aa763a96 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2067,7 +2067,6 @@ void scsi_exit_queue(void)
* @sdev: SCSI device to be queried
* @pf: Page format bit (1 == standard, 0 == vendor specific)
* @sp: Save page bit (0 == don't save, 1 == save)
- * @modepage: mode page being requested
* @buffer: request buffer (may not be smaller than eight bytes)
* @len: length of request buffer.
* @timeout: command timeout
@@ -2080,10 +2079,9 @@ void scsi_exit_queue(void)
* status on error
*
*/
-int
-scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
- unsigned char *buffer, int len, int timeout, int retries,
- struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
+int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
+ unsigned char *buffer, int len, int timeout, int retries,
+ struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
unsigned char cmd[10];
unsigned char *real_buffer;
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index d6982d355739..95aee1ad1383 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -49,7 +49,7 @@ static DEFINE_MUTEX(global_host_template_mutex);
static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct Scsi_Host *shost = PDE_DATA(file_inode(file));
+ struct Scsi_Host *shost = pde_data(file_inode(file));
ssize_t ret = -ENOMEM;
char *page;
@@ -79,7 +79,7 @@ static int proc_scsi_show(struct seq_file *m, void *v)
static int proc_scsi_host_open(struct inode *inode, struct file *file)
{
- return single_open_size(file, proc_scsi_show, PDE_DATA(inode),
+ return single_open_size(file, proc_scsi_show, pde_data(inode),
4 * PAGE_SIZE);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0e73c3f2f381..62eb9921cc94 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -209,7 +209,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
*/
data.device_specific = 0;
- if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
+ if (scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT,
sdkp->max_retries, &data, &sshdr)) {
if (scsi_sense_valid(&sshdr))
sd_print_sense_hdr(sdkp, &sshdr);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ad12b3261845..6b43e97bd417 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -77,7 +77,7 @@ static int sg_proc_init(void);
#define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ)
-int sg_big_buff = SG_DEF_RESERVED_SIZE;
+static int sg_big_buff = SG_DEF_RESERVED_SIZE;
/* N.B. This variable is readable and writeable via
/proc/scsi/sg/def_reserved_size . Each time sg_open() is called a buffer
of this size (or less if there is not enough memory) will be reserved
@@ -1634,6 +1634,37 @@ MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element "
MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd");
MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))");
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+
+static struct ctl_table sg_sysctls[] = {
+ {
+ .procname = "sg-big-buff",
+ .data = &sg_big_buff,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+
+static struct ctl_table_header *hdr;
+static void register_sg_sysctls(void)
+{
+ if (!hdr)
+ hdr = register_sysctl("kernel", sg_sysctls);
+}
+
+static void unregister_sg_sysctls(void)
+{
+ if (hdr)
+ unregister_sysctl_table(hdr);
+}
+#else
+#define register_sg_sysctls() do { } while (0)
+#define unregister_sg_sysctls() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
static int __init
init_sg(void)
{
@@ -1666,6 +1697,7 @@ init_sg(void)
return 0;
}
class_destroy(sg_sysfs_class);
+ register_sg_sysctls();
err_out:
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS);
return rc;
@@ -1674,6 +1706,7 @@ err_out:
static void __exit
exit_sg(void)
{
+ unregister_sg_sysctls();
#ifdef CONFIG_SCSI_PROC_FS
remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 20595c0ba0ae..9a0bba5a51a7 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -21,6 +21,8 @@
#include <linux/device.h>
#include <linux/hyperv.h>
#include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
+
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
@@ -1336,6 +1338,7 @@ static void storvsc_on_channel_callback(void *context)
continue;
}
request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
+ scsi_dma_unmap(scmnd);
}
storvsc_on_receive(stor_device, packet, request);
@@ -1749,9 +1752,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
struct hv_host_device *host_dev = shost_priv(host);
struct hv_device *dev = host_dev->dev;
struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
- int i;
struct scatterlist *sgl;
- unsigned int sg_count;
struct vmscsi_request *vm_srb;
struct vmbus_packet_mpb_array *payload;
u32 payload_sz;
@@ -1824,17 +1825,17 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
memcpy(vm_srb->cdb, scmnd->cmnd, vm_srb->cdb_length);
sgl = (struct scatterlist *)scsi_sglist(scmnd);
- sg_count = scsi_sg_count(scmnd);
length = scsi_bufflen(scmnd);
payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
payload_sz = sizeof(cmd_request->mpb);
- if (sg_count) {
- unsigned int hvpgoff, hvpfns_to_add;
+ if (scsi_sg_count(scmnd)) {
unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset);
unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
- u64 hvpfn;
+ struct scatterlist *sg;
+ unsigned long hvpfn, hvpfns_to_add;
+ int j, i = 0, sg_count;
if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
@@ -1848,21 +1849,24 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
payload->range.len = length;
payload->range.offset = offset_in_hvpg;
+ sg_count = scsi_dma_map(scmnd);
+ if (sg_count < 0) {
+ ret = SCSI_MLQUEUE_DEVICE_BUSY;
+ goto err_free_payload;
+ }
- for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {
+ for_each_sg(sgl, sg, sg_count, j) {
/*
- * Init values for the current sgl entry. hvpgoff
- * and hvpfns_to_add are in units of Hyper-V size
- * pages. Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE
- * case also handles values of sgl->offset that are
- * larger than PAGE_SIZE. Such offsets are handled
- * even on other than the first sgl entry, provided
- * they are a multiple of PAGE_SIZE.
+ * Init values for the current sgl entry. hvpfns_to_add
+ * is in units of Hyper-V size pages. Handling the
+ * PAGE_SIZE != HV_HYP_PAGE_SIZE case also handles
+ * values of sgl->offset that are larger than PAGE_SIZE.
+ * Such offsets are handled even on other than the first
+ * sgl entry, provided they are a multiple of PAGE_SIZE.
*/
- hvpgoff = HVPFN_DOWN(sgl->offset);
- hvpfn = page_to_hvpfn(sg_page(sgl)) + hvpgoff;
- hvpfns_to_add = HVPFN_UP(sgl->offset + sgl->length) -
- hvpgoff;
+ hvpfn = HVPFN_DOWN(sg_dma_address(sg));
+ hvpfns_to_add = HVPFN_UP(sg_dma_address(sg) +
+ sg_dma_len(sg)) - hvpfn;
/*
* Fill the next portion of the PFN array with
@@ -1872,7 +1876,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
* the PFN array is filled.
*/
while (hvpfns_to_add--)
- payload->range.pfn_array[i++] = hvpfn++;
+ payload->range.pfn_array[i++] = hvpfn++;
}
}
@@ -1884,13 +1888,18 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
put_cpu();
if (ret == -EAGAIN) {
- if (payload_sz > sizeof(cmd_request->mpb))
- kfree(payload);
/* no more space */
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ ret = SCSI_MLQUEUE_DEVICE_BUSY;
+ goto err_free_payload;
}
return 0;
+
+err_free_payload:
+ if (payload_sz > sizeof(cmd_request->mpb))
+ kfree(payload);
+
+ return ret;
}
static struct scsi_host_template scsi_driver = {
@@ -2016,6 +2025,7 @@ static int storvsc_probe(struct hv_device *device,
stor_device->vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
spin_lock_init(&stor_device->lock);
hv_set_drvdata(device, stor_device);
+ dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);
stor_device->port_number = host->host_no;
ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index 5393b5c9dd9c..86a938075f30 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -557,7 +557,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba)
struct ufs_mtk_host *host = ufshcd_get_variant(hba);
host->reg_va09 = regulator_get(hba->dev, "va09");
- if (!host->reg_va09)
+ if (IS_ERR(host->reg_va09))
dev_info(hba->dev, "failed to get va09");
else
host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1049e41abd5b..460d2b440d2e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7815,7 +7815,7 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba)
peer_pa_tactivate_us = peer_pa_tactivate *
gran_to_us_table[peer_granularity - 1];
- if (pa_tactivate_us > peer_pa_tactivate_us) {
+ if (pa_tactivate_us >= peer_pa_tactivate_us) {
u32 new_peer_pa_tactivate;
new_peer_pa_tactivate = pa_tactivate_us /
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 65c642b24ecf..0e6110da69e7 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -778,7 +778,7 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
static void virtscsi_remove_vqs(struct virtio_device *vdev)
{
/* Stop all the virtqueues. */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
}
diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig
index 853096b7e84c..2527cf5757ec 100644
--- a/drivers/soc/canaan/Kconfig
+++ b/drivers/soc/canaan/Kconfig
@@ -5,7 +5,6 @@ config SOC_K210_SYSCTL
depends on RISCV && SOC_CANAAN && OF
default SOC_CANAAN
select PM
- select SYSCON
select MFD_SYSCON
help
Canaan Kendryte K210 SoC system controller driver.
diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c
index acda8a5637c5..4d7b9caee1c4 100644
--- a/drivers/soc/fsl/qbman/bman_portal.c
+++ b/drivers/soc/fsl/qbman/bman_portal.c
@@ -155,7 +155,7 @@ static int bman_portal_probe(struct platform_device *pdev)
}
spin_lock(&bman_lock);
- cpu = cpumask_next_zero(-1, &portal_cpus);
+ cpu = cpumask_first_zero(&portal_cpus);
if (cpu >= nr_cpu_ids) {
__bman_portals_probed = 1;
/* unassigned portal, skip init */
diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
index 96f74a1dc603..e23b60618c1a 100644
--- a/drivers/soc/fsl/qbman/qman_portal.c
+++ b/drivers/soc/fsl/qbman/qman_portal.c
@@ -248,7 +248,7 @@ static int qman_portal_probe(struct platform_device *pdev)
pcfg->pools = qm_get_pools_sdqcr();
spin_lock(&qman_lock);
- cpu = cpumask_next_zero(-1, &portal_cpus);
+ cpu = cpumask_first_zero(&portal_cpus);
if (cpu >= nr_cpu_ids) {
__qman_portals_probed = 1;
/* unassigned portal, skip init */
diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c
index 56be39161489..31ab6c657fec 100644
--- a/drivers/soc/ti/k3-ringacc.c
+++ b/drivers/soc/ti/k3-ringacc.c
@@ -358,8 +358,8 @@ struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc,
goto out;
if (flags & K3_RINGACC_RING_USE_PROXY) {
- proxy_id = find_next_zero_bit(ringacc->proxy_inuse,
- ringacc->num_proxies, 0);
+ proxy_id = find_first_zero_bit(ringacc->proxy_inuse,
+ ringacc->num_proxies);
if (proxy_id == ringacc->num_proxies)
goto error;
}
diff --git a/drivers/soc/xilinx/Kconfig b/drivers/soc/xilinx/Kconfig
index 53af9115dc31..8a755a5c8836 100644
--- a/drivers/soc/xilinx/Kconfig
+++ b/drivers/soc/xilinx/Kconfig
@@ -25,4 +25,14 @@ config ZYNQMP_PM_DOMAINS
Say yes to enable device power management through PM domains
If in doubt, say N.
+config XLNX_EVENT_MANAGER
+ bool "Enable Xilinx Event Management Driver"
+ depends on ZYNQMP_FIRMWARE
+ default ZYNQMP_FIRMWARE
+ help
+ Say yes to enable event management support for Xilinx.
+ This driver uses firmware driver as an interface for event/power
+ management request to firmware.
+
+ If in doubt, say N.
endmenu
diff --git a/drivers/soc/xilinx/Makefile b/drivers/soc/xilinx/Makefile
index 9854e6f6086b..41e585bc9c67 100644
--- a/drivers/soc/xilinx/Makefile
+++ b/drivers/soc/xilinx/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ZYNQMP_POWER) += zynqmp_power.o
obj-$(CONFIG_ZYNQMP_PM_DOMAINS) += zynqmp_pm_domains.o
+obj-$(CONFIG_XLNX_EVENT_MANAGER) += xlnx_event_manager.o
diff --git a/drivers/soc/xilinx/xlnx_event_manager.c b/drivers/soc/xilinx/xlnx_event_manager.c
new file mode 100644
index 000000000000..b27f8853508e
--- /dev/null
+++ b/drivers/soc/xilinx/xlnx_event_manager.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Event Management Driver
+ *
+ * Copyright (C) 2021 Xilinx, Inc.
+ *
+ * Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/firmware/xlnx-event-manager.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/hashtable.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+static DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number1);
+
+static int virq_sgi;
+static int event_manager_availability = -EACCES;
+
+/* SGI number used for Event management driver */
+#define XLNX_EVENT_SGI_NUM (15)
+
+/* Max number of driver can register for same event */
+#define MAX_DRIVER_PER_EVENT (10U)
+
+/* Max HashMap Order for PM API feature check (1<<7 = 128) */
+#define REGISTERED_DRIVER_MAX_ORDER (7)
+
+#define MAX_BITS (32U) /* Number of bits available for error mask */
+
+#define FIRMWARE_VERSION_MASK (0xFFFFU)
+#define REGISTER_NOTIFIER_FIRMWARE_VERSION (2U)
+
+static DEFINE_HASHTABLE(reg_driver_map, REGISTERED_DRIVER_MAX_ORDER);
+static int sgi_num = XLNX_EVENT_SGI_NUM;
+
+/**
+ * struct registered_event_data - Registered Event Data.
+ * @key: key is the combine id(Node-Id | Event-Id) of type u64
+ * where upper u32 for Node-Id and lower u32 for Event-Id,
+ * And this used as key to index into hashmap.
+ * @agent_data: Data passed back to handler function.
+ * @cb_type: Type of Api callback, like PM_NOTIFY_CB, etc.
+ * @eve_cb: Function pointer to store the callback function.
+ * @wake: If this flag set, firmware will wakeup processor if is
+ * in sleep or power down state.
+ * @hentry: hlist_node that hooks this entry into hashtable.
+ */
+struct registered_event_data {
+ u64 key;
+ enum pm_api_cb_id cb_type;
+ void *agent_data;
+
+ event_cb_func_t eve_cb;
+ bool wake;
+ struct hlist_node hentry;
+};
+
+static bool xlnx_is_error_event(const u32 node_id)
+{
+ if (node_id == EVENT_ERROR_PMC_ERR1 ||
+ node_id == EVENT_ERROR_PMC_ERR2 ||
+ node_id == EVENT_ERROR_PSM_ERR1 ||
+ node_id == EVENT_ERROR_PSM_ERR2)
+ return true;
+
+ return false;
+}
+
+static int xlnx_add_cb_for_notify_event(const u32 node_id, const u32 event, const bool wake,
+ event_cb_func_t cb_fun, void *data)
+{
+ u64 key = 0;
+ struct registered_event_data *eve_data;
+
+ key = ((u64)node_id << 32U) | (u64)event;
+ /* Check for existing entry in hash table for given key id */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, key) {
+ if (eve_data->key == key) {
+ pr_err("Found as already registered\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Add new entry if not present */
+ eve_data = kmalloc(sizeof(*eve_data), GFP_KERNEL);
+ if (!eve_data)
+ return -ENOMEM;
+
+ eve_data->key = key;
+ eve_data->cb_type = PM_NOTIFY_CB;
+ eve_data->eve_cb = cb_fun;
+ eve_data->wake = wake;
+ eve_data->agent_data = data;
+
+ hash_add(reg_driver_map, &eve_data->hentry, key);
+
+ return 0;
+}
+
+static int xlnx_add_cb_for_suspend(event_cb_func_t cb_fun, void *data)
+{
+ struct registered_event_data *eve_data;
+
+ /* Check for existing entry in hash table for given cb_type */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, PM_INIT_SUSPEND_CB) {
+ if (eve_data->cb_type == PM_INIT_SUSPEND_CB) {
+ pr_err("Found as already registered\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Add new entry if not present */
+ eve_data = kmalloc(sizeof(*eve_data), GFP_KERNEL);
+ if (!eve_data)
+ return -ENOMEM;
+
+ eve_data->key = 0;
+ eve_data->cb_type = PM_INIT_SUSPEND_CB;
+ eve_data->eve_cb = cb_fun;
+ eve_data->agent_data = data;
+
+ hash_add(reg_driver_map, &eve_data->hentry, PM_INIT_SUSPEND_CB);
+
+ return 0;
+}
+
+static int xlnx_remove_cb_for_suspend(event_cb_func_t cb_fun)
+{
+ bool is_callback_found = false;
+ struct registered_event_data *eve_data;
+
+ /* Check for existing entry in hash table for given cb_type */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, PM_INIT_SUSPEND_CB) {
+ if (eve_data->cb_type == PM_INIT_SUSPEND_CB &&
+ eve_data->eve_cb == cb_fun) {
+ is_callback_found = true;
+ /* remove an object from a hashtable */
+ hash_del(&eve_data->hentry);
+ kfree(eve_data);
+ }
+ }
+ if (!is_callback_found) {
+ pr_warn("Didn't find any registered callback for suspend event\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int xlnx_remove_cb_for_notify_event(const u32 node_id, const u32 event,
+ event_cb_func_t cb_fun)
+{
+ bool is_callback_found = false;
+ struct registered_event_data *eve_data;
+ u64 key = ((u64)node_id << 32U) | (u64)event;
+
+ /* Check for existing entry in hash table for given key id */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, key) {
+ if (eve_data->key == key &&
+ eve_data->eve_cb == cb_fun) {
+ is_callback_found = true;
+ /* remove an object from a hashtable */
+ hash_del(&eve_data->hentry);
+ kfree(eve_data);
+ }
+ }
+ if (!is_callback_found) {
+ pr_warn("Didn't find any registered callback for 0x%x 0x%x\n",
+ node_id, event);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * xlnx_register_event() - Register for the event.
+ * @cb_type: Type of callback from pm_api_cb_id,
+ * PM_NOTIFY_CB - for Error Events,
+ * PM_INIT_SUSPEND_CB - for suspend callback.
+ * @node_id: Node-Id related to event.
+ * @event: Event Mask for the Error Event.
+ * @wake: Flag specifying whether the subsystem should be woken upon
+ * event notification.
+ * @cb_fun: Function pointer to store the callback function.
+ * @data: Pointer for the driver instance.
+ *
+ * Return: Returns 0 on successful registration else error code.
+ */
+int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id, const u32 event,
+ const bool wake, event_cb_func_t cb_fun, void *data)
+{
+ int ret = 0;
+ u32 eve;
+ int pos;
+
+ if (event_manager_availability)
+ return event_manager_availability;
+
+ if (cb_type != PM_NOTIFY_CB && cb_type != PM_INIT_SUSPEND_CB) {
+ pr_err("%s() Unsupported Callback 0x%x\n", __func__, cb_type);
+ return -EINVAL;
+ }
+
+ if (!cb_fun)
+ return -EFAULT;
+
+ if (cb_type == PM_INIT_SUSPEND_CB) {
+ ret = xlnx_add_cb_for_suspend(cb_fun, data);
+ } else {
+ if (!xlnx_is_error_event(node_id)) {
+ /* Add entry for Node-Id/Event in hash table */
+ ret = xlnx_add_cb_for_notify_event(node_id, event, wake, cb_fun, data);
+ } else {
+ /* Add into Hash table */
+ for (pos = 0; pos < MAX_BITS; pos++) {
+ eve = event & (1 << pos);
+ if (!eve)
+ continue;
+
+ /* Add entry for Node-Id/Eve in hash table */
+ ret = xlnx_add_cb_for_notify_event(node_id, eve, wake, cb_fun,
+ data);
+ /* Break the loop if got error */
+ if (ret)
+ break;
+ }
+ if (ret) {
+ /* Skip the Event for which got the error */
+ pos--;
+ /* Remove registered(during this call) event from hash table */
+ for ( ; pos >= 0; pos--) {
+ eve = event & (1 << pos);
+ if (!eve)
+ continue;
+ xlnx_remove_cb_for_notify_event(node_id, eve, cb_fun);
+ }
+ }
+ }
+
+ if (ret) {
+ pr_err("%s() failed for 0x%x and 0x%x: %d\r\n", __func__, node_id,
+ event, ret);
+ return ret;
+ }
+
+ /* Register for Node-Id/Event combination in firmware */
+ ret = zynqmp_pm_register_notifier(node_id, event, wake, true);
+ if (ret) {
+ pr_err("%s() failed for 0x%x and 0x%x: %d\r\n", __func__, node_id,
+ event, ret);
+ /* Remove already registered event from hash table */
+ if (xlnx_is_error_event(node_id)) {
+ for (pos = 0; pos < MAX_BITS; pos++) {
+ eve = event & (1 << pos);
+ if (!eve)
+ continue;
+ xlnx_remove_cb_for_notify_event(node_id, eve, cb_fun);
+ }
+ } else {
+ xlnx_remove_cb_for_notify_event(node_id, event, cb_fun);
+ }
+ return ret;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xlnx_register_event);
+
+/**
+ * xlnx_unregister_event() - Unregister for the event.
+ * @cb_type: Type of callback from pm_api_cb_id,
+ * PM_NOTIFY_CB - for Error Events,
+ * PM_INIT_SUSPEND_CB - for suspend callback.
+ * @node_id: Node-Id related to event.
+ * @event: Event Mask for the Error Event.
+ * @cb_fun: Function pointer of callback function.
+ *
+ * Return: Returns 0 on successful unregistration else error code.
+ */
+int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id, const u32 event,
+ event_cb_func_t cb_fun)
+{
+ int ret;
+ u32 eve, pos;
+
+ if (event_manager_availability)
+ return event_manager_availability;
+
+ if (cb_type != PM_NOTIFY_CB && cb_type != PM_INIT_SUSPEND_CB) {
+ pr_err("%s() Unsupported Callback 0x%x\n", __func__, cb_type);
+ return -EINVAL;
+ }
+
+ if (!cb_fun)
+ return -EFAULT;
+
+ if (cb_type == PM_INIT_SUSPEND_CB) {
+ ret = xlnx_remove_cb_for_suspend(cb_fun);
+ } else {
+ /* Remove Node-Id/Event from hash table */
+ if (!xlnx_is_error_event(node_id)) {
+ xlnx_remove_cb_for_notify_event(node_id, event, cb_fun);
+ } else {
+ for (pos = 0; pos < MAX_BITS; pos++) {
+ eve = event & (1 << pos);
+ if (!eve)
+ continue;
+
+ xlnx_remove_cb_for_notify_event(node_id, eve, cb_fun);
+ }
+ }
+
+ /* Un-register for Node-Id/Event combination */
+ ret = zynqmp_pm_register_notifier(node_id, event, false, false);
+ if (ret) {
+ pr_err("%s() failed for 0x%x and 0x%x: %d\n",
+ __func__, node_id, event, ret);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xlnx_unregister_event);
+
+static void xlnx_call_suspend_cb_handler(const u32 *payload)
+{
+ bool is_callback_found = false;
+ struct registered_event_data *eve_data;
+ u32 cb_type = payload[0];
+
+ /* Check for existing entry in hash table for given cb_type */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, cb_type) {
+ if (eve_data->cb_type == cb_type) {
+ eve_data->eve_cb(&payload[0], eve_data->agent_data);
+ is_callback_found = true;
+ }
+ }
+ if (!is_callback_found)
+ pr_warn("Didn't find any registered callback for suspend event\n");
+}
+
+static void xlnx_call_notify_cb_handler(const u32 *payload)
+{
+ bool is_callback_found = false;
+ struct registered_event_data *eve_data;
+ u64 key = ((u64)payload[1] << 32U) | (u64)payload[2];
+ int ret;
+
+ /* Check for existing entry in hash table for given key id */
+ hash_for_each_possible(reg_driver_map, eve_data, hentry, key) {
+ if (eve_data->key == key) {
+ eve_data->eve_cb(&payload[0], eve_data->agent_data);
+ is_callback_found = true;
+
+ /* re register with firmware to get future events */
+ ret = zynqmp_pm_register_notifier(payload[1], payload[2],
+ eve_data->wake, true);
+ if (ret) {
+ pr_err("%s() failed for 0x%x and 0x%x: %d\r\n", __func__,
+ payload[1], payload[2], ret);
+ /* Remove already registered event from hash table */
+ xlnx_remove_cb_for_notify_event(payload[1], payload[2],
+ eve_data->eve_cb);
+ }
+ }
+ }
+ if (!is_callback_found)
+ pr_warn("Didn't find any registered callback for 0x%x 0x%x\n",
+ payload[1], payload[2]);
+}
+
+static void xlnx_get_event_callback_data(u32 *buf)
+{
+ zynqmp_pm_invoke_fn(GET_CALLBACK_DATA, 0, 0, 0, 0, buf);
+}
+
+static irqreturn_t xlnx_event_handler(int irq, void *dev_id)
+{
+ u32 cb_type, node_id, event, pos;
+ u32 payload[CB_MAX_PAYLOAD_SIZE] = {0};
+ u32 event_data[CB_MAX_PAYLOAD_SIZE] = {0};
+
+ /* Get event data */
+ xlnx_get_event_callback_data(payload);
+
+ /* First element is callback type, others are callback arguments */
+ cb_type = payload[0];
+
+ if (cb_type == PM_NOTIFY_CB) {
+ node_id = payload[1];
+ event = payload[2];
+ if (!xlnx_is_error_event(node_id)) {
+ xlnx_call_notify_cb_handler(payload);
+ } else {
+ /*
+ * Each call back function expecting payload as an input arguments.
+ * We can get multiple error events as in one call back through error
+ * mask. So payload[2] may can contain multiple error events.
+ * In reg_driver_map database we store data in the combination of single
+ * node_id-error combination.
+ * So coping the payload message into event_data and update the
+ * event_data[2] with Error Mask for single error event and use
+ * event_data as input argument for registered call back function.
+ *
+ */
+ memcpy(event_data, payload, (4 * CB_MAX_PAYLOAD_SIZE));
+ /* Support Multiple Error Event */
+ for (pos = 0; pos < MAX_BITS; pos++) {
+ if ((0 == (event & (1 << pos))))
+ continue;
+ event_data[2] = (event & (1 << pos));
+ xlnx_call_notify_cb_handler(event_data);
+ }
+ }
+ } else if (cb_type == PM_INIT_SUSPEND_CB) {
+ xlnx_call_suspend_cb_handler(payload);
+ } else {
+ pr_err("%s() Unsupported Callback %d\n", __func__, cb_type);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int xlnx_event_cpuhp_start(unsigned int cpu)
+{
+ enable_percpu_irq(virq_sgi, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static int xlnx_event_cpuhp_down(unsigned int cpu)
+{
+ disable_percpu_irq(virq_sgi);
+
+ return 0;
+}
+
+static void xlnx_disable_percpu_irq(void *data)
+{
+ disable_percpu_irq(virq_sgi);
+}
+
+static int xlnx_event_init_sgi(struct platform_device *pdev)
+{
+ int ret = 0;
+ int cpu = smp_processor_id();
+ /*
+ * IRQ related structures are used for the following:
+ * for each SGI interrupt ensure its mapped by GIC IRQ domain
+ * and that each corresponding linux IRQ for the HW IRQ has
+ * a handler for when receiving an interrupt from the remote
+ * processor.
+ */
+ struct irq_domain *domain;
+ struct irq_fwspec sgi_fwspec;
+ struct device_node *interrupt_parent = NULL;
+ struct device *parent = pdev->dev.parent;
+
+ /* Find GIC controller to map SGIs. */
+ interrupt_parent = of_irq_find_parent(parent->of_node);
+ if (!interrupt_parent) {
+ dev_err(&pdev->dev, "Failed to find property for Interrupt parent\n");
+ return -EINVAL;
+ }
+
+ /* Each SGI needs to be associated with GIC's IRQ domain. */
+ domain = irq_find_host(interrupt_parent);
+ of_node_put(interrupt_parent);
+
+ /* Each mapping needs GIC domain when finding IRQ mapping. */
+ sgi_fwspec.fwnode = domain->fwnode;
+
+ /*
+ * When irq domain looks at mapping each arg is as follows:
+ * 3 args for: interrupt type (SGI), interrupt # (set later), type
+ */
+ sgi_fwspec.param_count = 1;
+
+ /* Set SGI's hwirq */
+ sgi_fwspec.param[0] = sgi_num;
+ virq_sgi = irq_create_fwspec_mapping(&sgi_fwspec);
+
+ per_cpu(cpu_number1, cpu) = cpu;
+ ret = request_percpu_irq(virq_sgi, xlnx_event_handler, "xlnx_event_mgmt",
+ &cpu_number1);
+ WARN_ON(ret);
+ if (ret) {
+ irq_dispose_mapping(virq_sgi);
+ return ret;
+ }
+
+ irq_to_desc(virq_sgi);
+ irq_set_status_flags(virq_sgi, IRQ_PER_CPU);
+
+ return ret;
+}
+
+static void xlnx_event_cleanup_sgi(struct platform_device *pdev)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(cpu_number1, cpu) = cpu;
+
+ cpuhp_remove_state(CPUHP_AP_ONLINE_DYN);
+
+ on_each_cpu(xlnx_disable_percpu_irq, NULL, 1);
+
+ irq_clear_status_flags(virq_sgi, IRQ_PER_CPU);
+ free_percpu_irq(virq_sgi, &cpu_number1);
+ irq_dispose_mapping(virq_sgi);
+}
+
+static int xlnx_event_manager_probe(struct platform_device *pdev)
+{
+ int ret;
+
+ ret = zynqmp_pm_feature(PM_REGISTER_NOTIFIER);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Feature check failed with %d\n", ret);
+ return ret;
+ }
+
+ if ((ret & FIRMWARE_VERSION_MASK) <
+ REGISTER_NOTIFIER_FIRMWARE_VERSION) {
+ dev_err(&pdev->dev, "Register notifier version error. Expected Firmware: v%d - Found: v%d\n",
+ REGISTER_NOTIFIER_FIRMWARE_VERSION,
+ ret & FIRMWARE_VERSION_MASK);
+ return -EOPNOTSUPP;
+ }
+
+ /* Initialize the SGI */
+ ret = xlnx_event_init_sgi(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "SGI Init has been failed with %d\n", ret);
+ return ret;
+ }
+
+ /* Setup function for the CPU hot-plug cases */
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "soc/event:starting",
+ xlnx_event_cpuhp_start, xlnx_event_cpuhp_down);
+
+ ret = zynqmp_pm_invoke_fn(PM_IOCTL, 0, IOCTL_REGISTER_SGI, sgi_num,
+ 0, NULL);
+ if (ret) {
+ dev_err(&pdev->dev, "SGI %d Registration over TF-A failed with %d\n", sgi_num, ret);
+ xlnx_event_cleanup_sgi(pdev);
+ return ret;
+ }
+
+ event_manager_availability = 0;
+
+ dev_info(&pdev->dev, "SGI %d Registered over TF-A\n", sgi_num);
+ dev_info(&pdev->dev, "Xilinx Event Management driver probed\n");
+
+ return ret;
+}
+
+static int xlnx_event_manager_remove(struct platform_device *pdev)
+{
+ int i;
+ struct registered_event_data *eve_data;
+ struct hlist_node *tmp;
+ int ret;
+
+ hash_for_each_safe(reg_driver_map, i, tmp, eve_data, hentry) {
+ hash_del(&eve_data->hentry);
+ kfree(eve_data);
+ }
+
+ ret = zynqmp_pm_invoke_fn(PM_IOCTL, 0, IOCTL_REGISTER_SGI, 0, 1, NULL);
+ if (ret)
+ dev_err(&pdev->dev, "SGI unregistration over TF-A failed with %d\n", ret);
+
+ xlnx_event_cleanup_sgi(pdev);
+
+ event_manager_availability = -EACCES;
+
+ return ret;
+}
+
+static struct platform_driver xlnx_event_manager_driver = {
+ .probe = xlnx_event_manager_probe,
+ .remove = xlnx_event_manager_remove,
+ .driver = {
+ .name = "xlnx_event_manager",
+ },
+};
+module_param(sgi_num, uint, 0);
+module_platform_driver(xlnx_event_manager_driver);
diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c
index f8c301984d4f..859dd31b6eff 100644
--- a/drivers/soc/xilinx/zynqmp_power.c
+++ b/drivers/soc/xilinx/zynqmp_power.c
@@ -16,6 +16,7 @@
#include <linux/suspend.h>
#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/firmware/xlnx-event-manager.h>
#include <linux/mailbox/zynqmp-ipi-message.h>
/**
@@ -30,6 +31,7 @@ struct zynqmp_pm_work_struct {
static struct zynqmp_pm_work_struct *zynqmp_pm_init_suspend_work;
static struct mbox_chan *rx_chan;
+static bool event_registered;
enum pm_suspend_mode {
PM_SUSPEND_MODE_FIRST = 0,
@@ -46,17 +48,24 @@ static const char *const suspend_modes[] = {
static enum pm_suspend_mode suspend_mode = PM_SUSPEND_MODE_STD;
-enum pm_api_cb_id {
- PM_INIT_SUSPEND_CB = 30,
- PM_ACKNOWLEDGE_CB,
- PM_NOTIFY_CB,
-};
-
static void zynqmp_pm_get_callback_data(u32 *buf)
{
zynqmp_pm_invoke_fn(GET_CALLBACK_DATA, 0, 0, 0, 0, buf);
}
+static void suspend_event_callback(const u32 *payload, void *data)
+{
+ /* First element is callback API ID, others are callback arguments */
+ if (work_pending(&zynqmp_pm_init_suspend_work->callback_work))
+ return;
+
+ /* Copy callback arguments into work's structure */
+ memcpy(zynqmp_pm_init_suspend_work->args, &payload[1],
+ sizeof(zynqmp_pm_init_suspend_work->args));
+
+ queue_work(system_unbound_wq, &zynqmp_pm_init_suspend_work->callback_work);
+}
+
static irqreturn_t zynqmp_pm_isr(int irq, void *data)
{
u32 payload[CB_PAYLOAD_SIZE];
@@ -184,7 +193,32 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
if (pm_api_version < ZYNQMP_PM_VERSION)
return -ENODEV;
- if (of_find_property(pdev->dev.of_node, "mboxes", NULL)) {
+ /*
+ * First try to use Xilinx Event Manager by registering suspend_event_callback
+ * for suspend/shutdown event.
+ * If xlnx_register_event() returns -EACCES (Xilinx Event Manager
+ * is not available to use) or -ENODEV(Xilinx Event Manager not compiled),
+ * then use ipi-mailbox or interrupt method.
+ */
+ ret = xlnx_register_event(PM_INIT_SUSPEND_CB, 0, 0, false,
+ suspend_event_callback, NULL);
+ if (!ret) {
+ zynqmp_pm_init_suspend_work = devm_kzalloc(&pdev->dev,
+ sizeof(struct zynqmp_pm_work_struct),
+ GFP_KERNEL);
+ if (!zynqmp_pm_init_suspend_work) {
+ xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0,
+ suspend_event_callback);
+ return -ENOMEM;
+ }
+ event_registered = true;
+
+ INIT_WORK(&zynqmp_pm_init_suspend_work->callback_work,
+ zynqmp_pm_init_suspend_work_fn);
+ } else if (ret != -EACCES && ret != -ENODEV) {
+ dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n", ret);
+ return ret;
+ } else if (of_find_property(pdev->dev.of_node, "mboxes", NULL)) {
zynqmp_pm_init_suspend_work =
devm_kzalloc(&pdev->dev,
sizeof(struct zynqmp_pm_work_struct),
@@ -228,6 +262,10 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_suspend_mode.attr);
if (ret) {
+ if (event_registered) {
+ xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0, suspend_event_callback);
+ event_registered = false;
+ }
dev_err(&pdev->dev, "unable to create sysfs interface\n");
return ret;
}
@@ -238,6 +276,8 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
static int zynqmp_pm_remove(struct platform_device *pdev)
{
sysfs_remove_file(&pdev->dev.kobj, &dev_attr_suspend_mode.attr);
+ if (event_registered)
+ xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0, suspend_event_callback);
if (!rx_chan)
mbox_free_channel(rx_chan);
diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 9d42891ac3d6..54813417ef8e 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -1156,11 +1156,7 @@ static int qcom_swrm_get_port_config(struct qcom_swrm_ctrl *ctrl)
ret = of_property_read_u8_array(np, "qcom,ports-block-pack-mode",
bp_mode, nports);
if (ret) {
- u32 version;
-
- ctrl->reg_read(ctrl, SWRM_COMP_HW_VERSION, &version);
-
- if (version <= 0x01030000)
+ if (ctrl->version <= 0x01030000)
memset(bp_mode, SWR_INVALID_PARAM, QCOM_SDW_MAX_PORTS);
else
return ret;
diff --git a/drivers/spmi/Kconfig b/drivers/spmi/Kconfig
index 2874b6c26028..737802046314 100644
--- a/drivers/spmi/Kconfig
+++ b/drivers/spmi/Kconfig
@@ -34,4 +34,15 @@ config SPMI_MSM_PMIC_ARB
This is required for communicating with Qualcomm PMICs and
other devices that have the SPMI interface.
+config SPMI_MTK_PMIF
+ tristate "Mediatek SPMI Controller (PMIC Arbiter)"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ help
+ If you say yes to this option, support will be included for the
+ built-in SPMI PMIC Arbiter interface on Mediatek family
+ processors.
+
+ This is required for communicating with Mediatek PMICs and
+ other devices that have the SPMI interface.
+
endif
diff --git a/drivers/spmi/Makefile b/drivers/spmi/Makefile
index 6e092e6f290c..9d974424c8c1 100644
--- a/drivers/spmi/Makefile
+++ b/drivers/spmi/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_SPMI) += spmi.o
obj-$(CONFIG_SPMI_HISI3670) += hisi-spmi-controller.o
obj-$(CONFIG_SPMI_MSM_PMIC_ARB) += spmi-pmic-arb.o
+obj-$(CONFIG_SPMI_MTK_PMIF) += spmi-mtk-pmif.o
diff --git a/drivers/spmi/spmi-mtk-pmif.c b/drivers/spmi/spmi-mtk-pmif.c
new file mode 100644
index 000000000000..ad511f2c3324
--- /dev/null
+++ b/drivers/spmi/spmi-mtk-pmif.c
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2021 MediaTek Inc.
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/spmi.h>
+
+#define SWINF_IDLE 0x00
+#define SWINF_WFVLDCLR 0x06
+
+#define GET_SWINF(x) (((x) >> 1) & 0x7)
+
+#define PMIF_CMD_REG_0 0
+#define PMIF_CMD_REG 1
+#define PMIF_CMD_EXT_REG 2
+#define PMIF_CMD_EXT_REG_LONG 3
+
+#define PMIF_DELAY_US 10
+#define PMIF_TIMEOUT_US (10 * 1000)
+
+#define PMIF_CHAN_OFFSET 0x5
+
+#define PMIF_MAX_CLKS 3
+
+#define SPMI_OP_ST_BUSY 1
+
+struct ch_reg {
+ u32 ch_sta;
+ u32 wdata;
+ u32 rdata;
+ u32 ch_send;
+ u32 ch_rdy;
+};
+
+struct pmif_data {
+ const u32 *regs;
+ const u32 *spmimst_regs;
+ u32 soc_chan;
+};
+
+struct pmif {
+ void __iomem *base;
+ void __iomem *spmimst_base;
+ struct ch_reg chan;
+ struct clk_bulk_data clks[PMIF_MAX_CLKS];
+ size_t nclks;
+ const struct pmif_data *data;
+};
+
+static const char * const pmif_clock_names[] = {
+ "pmif_sys_ck", "pmif_tmr_ck", "spmimst_clk_mux",
+};
+
+enum pmif_regs {
+ PMIF_INIT_DONE,
+ PMIF_INF_EN,
+ PMIF_ARB_EN,
+ PMIF_CMDISSUE_EN,
+ PMIF_TIMER_CTRL,
+ PMIF_SPI_MODE_CTRL,
+ PMIF_IRQ_EVENT_EN_0,
+ PMIF_IRQ_FLAG_0,
+ PMIF_IRQ_CLR_0,
+ PMIF_IRQ_EVENT_EN_1,
+ PMIF_IRQ_FLAG_1,
+ PMIF_IRQ_CLR_1,
+ PMIF_IRQ_EVENT_EN_2,
+ PMIF_IRQ_FLAG_2,
+ PMIF_IRQ_CLR_2,
+ PMIF_IRQ_EVENT_EN_3,
+ PMIF_IRQ_FLAG_3,
+ PMIF_IRQ_CLR_3,
+ PMIF_IRQ_EVENT_EN_4,
+ PMIF_IRQ_FLAG_4,
+ PMIF_IRQ_CLR_4,
+ PMIF_WDT_EVENT_EN_0,
+ PMIF_WDT_FLAG_0,
+ PMIF_WDT_EVENT_EN_1,
+ PMIF_WDT_FLAG_1,
+ PMIF_SWINF_0_STA,
+ PMIF_SWINF_0_WDATA_31_0,
+ PMIF_SWINF_0_RDATA_31_0,
+ PMIF_SWINF_0_ACC,
+ PMIF_SWINF_0_VLD_CLR,
+ PMIF_SWINF_1_STA,
+ PMIF_SWINF_1_WDATA_31_0,
+ PMIF_SWINF_1_RDATA_31_0,
+ PMIF_SWINF_1_ACC,
+ PMIF_SWINF_1_VLD_CLR,
+ PMIF_SWINF_2_STA,
+ PMIF_SWINF_2_WDATA_31_0,
+ PMIF_SWINF_2_RDATA_31_0,
+ PMIF_SWINF_2_ACC,
+ PMIF_SWINF_2_VLD_CLR,
+ PMIF_SWINF_3_STA,
+ PMIF_SWINF_3_WDATA_31_0,
+ PMIF_SWINF_3_RDATA_31_0,
+ PMIF_SWINF_3_ACC,
+ PMIF_SWINF_3_VLD_CLR,
+};
+
+static const u32 mt6873_regs[] = {
+ [PMIF_INIT_DONE] = 0x0000,
+ [PMIF_INF_EN] = 0x0024,
+ [PMIF_ARB_EN] = 0x0150,
+ [PMIF_CMDISSUE_EN] = 0x03B4,
+ [PMIF_TIMER_CTRL] = 0x03E0,
+ [PMIF_SPI_MODE_CTRL] = 0x0400,
+ [PMIF_IRQ_EVENT_EN_0] = 0x0418,
+ [PMIF_IRQ_FLAG_0] = 0x0420,
+ [PMIF_IRQ_CLR_0] = 0x0424,
+ [PMIF_IRQ_EVENT_EN_1] = 0x0428,
+ [PMIF_IRQ_FLAG_1] = 0x0430,
+ [PMIF_IRQ_CLR_1] = 0x0434,
+ [PMIF_IRQ_EVENT_EN_2] = 0x0438,
+ [PMIF_IRQ_FLAG_2] = 0x0440,
+ [PMIF_IRQ_CLR_2] = 0x0444,
+ [PMIF_IRQ_EVENT_EN_3] = 0x0448,
+ [PMIF_IRQ_FLAG_3] = 0x0450,
+ [PMIF_IRQ_CLR_3] = 0x0454,
+ [PMIF_IRQ_EVENT_EN_4] = 0x0458,
+ [PMIF_IRQ_FLAG_4] = 0x0460,
+ [PMIF_IRQ_CLR_4] = 0x0464,
+ [PMIF_WDT_EVENT_EN_0] = 0x046C,
+ [PMIF_WDT_FLAG_0] = 0x0470,
+ [PMIF_WDT_EVENT_EN_1] = 0x0474,
+ [PMIF_WDT_FLAG_1] = 0x0478,
+ [PMIF_SWINF_0_ACC] = 0x0C00,
+ [PMIF_SWINF_0_WDATA_31_0] = 0x0C04,
+ [PMIF_SWINF_0_RDATA_31_0] = 0x0C14,
+ [PMIF_SWINF_0_VLD_CLR] = 0x0C24,
+ [PMIF_SWINF_0_STA] = 0x0C28,
+ [PMIF_SWINF_1_ACC] = 0x0C40,
+ [PMIF_SWINF_1_WDATA_31_0] = 0x0C44,
+ [PMIF_SWINF_1_RDATA_31_0] = 0x0C54,
+ [PMIF_SWINF_1_VLD_CLR] = 0x0C64,
+ [PMIF_SWINF_1_STA] = 0x0C68,
+ [PMIF_SWINF_2_ACC] = 0x0C80,
+ [PMIF_SWINF_2_WDATA_31_0] = 0x0C84,
+ [PMIF_SWINF_2_RDATA_31_0] = 0x0C94,
+ [PMIF_SWINF_2_VLD_CLR] = 0x0CA4,
+ [PMIF_SWINF_2_STA] = 0x0CA8,
+ [PMIF_SWINF_3_ACC] = 0x0CC0,
+ [PMIF_SWINF_3_WDATA_31_0] = 0x0CC4,
+ [PMIF_SWINF_3_RDATA_31_0] = 0x0CD4,
+ [PMIF_SWINF_3_VLD_CLR] = 0x0CE4,
+ [PMIF_SWINF_3_STA] = 0x0CE8,
+};
+
+static const u32 mt8195_regs[] = {
+ [PMIF_INIT_DONE] = 0x0000,
+ [PMIF_INF_EN] = 0x0024,
+ [PMIF_ARB_EN] = 0x0150,
+ [PMIF_CMDISSUE_EN] = 0x03B8,
+ [PMIF_TIMER_CTRL] = 0x03E4,
+ [PMIF_SPI_MODE_CTRL] = 0x0408,
+ [PMIF_IRQ_EVENT_EN_0] = 0x0420,
+ [PMIF_IRQ_FLAG_0] = 0x0428,
+ [PMIF_IRQ_CLR_0] = 0x042C,
+ [PMIF_IRQ_EVENT_EN_1] = 0x0430,
+ [PMIF_IRQ_FLAG_1] = 0x0438,
+ [PMIF_IRQ_CLR_1] = 0x043C,
+ [PMIF_IRQ_EVENT_EN_2] = 0x0440,
+ [PMIF_IRQ_FLAG_2] = 0x0448,
+ [PMIF_IRQ_CLR_2] = 0x044C,
+ [PMIF_IRQ_EVENT_EN_3] = 0x0450,
+ [PMIF_IRQ_FLAG_3] = 0x0458,
+ [PMIF_IRQ_CLR_3] = 0x045C,
+ [PMIF_IRQ_EVENT_EN_4] = 0x0460,
+ [PMIF_IRQ_FLAG_4] = 0x0468,
+ [PMIF_IRQ_CLR_4] = 0x046C,
+ [PMIF_WDT_EVENT_EN_0] = 0x0474,
+ [PMIF_WDT_FLAG_0] = 0x0478,
+ [PMIF_WDT_EVENT_EN_1] = 0x047C,
+ [PMIF_WDT_FLAG_1] = 0x0480,
+ [PMIF_SWINF_0_ACC] = 0x0800,
+ [PMIF_SWINF_0_WDATA_31_0] = 0x0804,
+ [PMIF_SWINF_0_RDATA_31_0] = 0x0814,
+ [PMIF_SWINF_0_VLD_CLR] = 0x0824,
+ [PMIF_SWINF_0_STA] = 0x0828,
+ [PMIF_SWINF_1_ACC] = 0x0840,
+ [PMIF_SWINF_1_WDATA_31_0] = 0x0844,
+ [PMIF_SWINF_1_RDATA_31_0] = 0x0854,
+ [PMIF_SWINF_1_VLD_CLR] = 0x0864,
+ [PMIF_SWINF_1_STA] = 0x0868,
+ [PMIF_SWINF_2_ACC] = 0x0880,
+ [PMIF_SWINF_2_WDATA_31_0] = 0x0884,
+ [PMIF_SWINF_2_RDATA_31_0] = 0x0894,
+ [PMIF_SWINF_2_VLD_CLR] = 0x08A4,
+ [PMIF_SWINF_2_STA] = 0x08A8,
+ [PMIF_SWINF_3_ACC] = 0x08C0,
+ [PMIF_SWINF_3_WDATA_31_0] = 0x08C4,
+ [PMIF_SWINF_3_RDATA_31_0] = 0x08D4,
+ [PMIF_SWINF_3_VLD_CLR] = 0x08E4,
+ [PMIF_SWINF_3_STA] = 0x08E8,
+};
+
+enum spmi_regs {
+ SPMI_OP_ST_CTRL,
+ SPMI_GRP_ID_EN,
+ SPMI_OP_ST_STA,
+ SPMI_MST_SAMPL,
+ SPMI_MST_REQ_EN,
+ SPMI_REC_CTRL,
+ SPMI_REC0,
+ SPMI_REC1,
+ SPMI_REC2,
+ SPMI_REC3,
+ SPMI_REC4,
+ SPMI_MST_DBG,
+
+ /* MT8195 spmi regs */
+ SPMI_MST_RCS_CTRL,
+ SPMI_SLV_3_0_EINT,
+ SPMI_SLV_7_4_EINT,
+ SPMI_SLV_B_8_EINT,
+ SPMI_SLV_F_C_EINT,
+ SPMI_REC_CMD_DEC,
+ SPMI_DEC_DBG,
+};
+
+static const u32 mt6873_spmi_regs[] = {
+ [SPMI_OP_ST_CTRL] = 0x0000,
+ [SPMI_GRP_ID_EN] = 0x0004,
+ [SPMI_OP_ST_STA] = 0x0008,
+ [SPMI_MST_SAMPL] = 0x000c,
+ [SPMI_MST_REQ_EN] = 0x0010,
+ [SPMI_REC_CTRL] = 0x0040,
+ [SPMI_REC0] = 0x0044,
+ [SPMI_REC1] = 0x0048,
+ [SPMI_REC2] = 0x004c,
+ [SPMI_REC3] = 0x0050,
+ [SPMI_REC4] = 0x0054,
+ [SPMI_MST_DBG] = 0x00fc,
+};
+
+static const u32 mt8195_spmi_regs[] = {
+ [SPMI_OP_ST_CTRL] = 0x0000,
+ [SPMI_GRP_ID_EN] = 0x0004,
+ [SPMI_OP_ST_STA] = 0x0008,
+ [SPMI_MST_SAMPL] = 0x000C,
+ [SPMI_MST_REQ_EN] = 0x0010,
+ [SPMI_MST_RCS_CTRL] = 0x0014,
+ [SPMI_SLV_3_0_EINT] = 0x0020,
+ [SPMI_SLV_7_4_EINT] = 0x0024,
+ [SPMI_SLV_B_8_EINT] = 0x0028,
+ [SPMI_SLV_F_C_EINT] = 0x002C,
+ [SPMI_REC_CTRL] = 0x0040,
+ [SPMI_REC0] = 0x0044,
+ [SPMI_REC1] = 0x0048,
+ [SPMI_REC2] = 0x004C,
+ [SPMI_REC3] = 0x0050,
+ [SPMI_REC4] = 0x0054,
+ [SPMI_REC_CMD_DEC] = 0x005C,
+ [SPMI_DEC_DBG] = 0x00F8,
+ [SPMI_MST_DBG] = 0x00FC,
+};
+
+static u32 pmif_readl(struct pmif *arb, enum pmif_regs reg)
+{
+ return readl(arb->base + arb->data->regs[reg]);
+}
+
+static void pmif_writel(struct pmif *arb, u32 val, enum pmif_regs reg)
+{
+ writel(val, arb->base + arb->data->regs[reg]);
+}
+
+static void mtk_spmi_writel(struct pmif *arb, u32 val, enum spmi_regs reg)
+{
+ writel(val, arb->spmimst_base + arb->data->spmimst_regs[reg]);
+}
+
+static bool pmif_is_fsm_vldclr(struct pmif *arb)
+{
+ u32 reg_rdata;
+
+ reg_rdata = pmif_readl(arb, arb->chan.ch_sta);
+
+ return GET_SWINF(reg_rdata) == SWINF_WFVLDCLR;
+}
+
+static int pmif_arb_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid)
+{
+ struct pmif *arb = spmi_controller_get_drvdata(ctrl);
+ u32 rdata, cmd;
+ int ret;
+
+ /* Check the opcode */
+ if (opc < SPMI_CMD_RESET || opc > SPMI_CMD_WAKEUP)
+ return -EINVAL;
+
+ cmd = opc - SPMI_CMD_RESET;
+
+ mtk_spmi_writel(arb, (cmd << 0x4) | sid, SPMI_OP_ST_CTRL);
+ ret = readl_poll_timeout_atomic(arb->spmimst_base + arb->data->spmimst_regs[SPMI_OP_ST_STA],
+ rdata, (rdata & SPMI_OP_ST_BUSY) == SPMI_OP_ST_BUSY,
+ PMIF_DELAY_US, PMIF_TIMEOUT_US);
+ if (ret < 0)
+ dev_err(&ctrl->dev, "timeout, err = %d\n", ret);
+
+ return ret;
+}
+
+static int pmif_spmi_read_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
+ u16 addr, u8 *buf, size_t len)
+{
+ struct pmif *arb = spmi_controller_get_drvdata(ctrl);
+ struct ch_reg *inf_reg;
+ int ret;
+ u32 data, cmd;
+
+ /* Check for argument validation. */
+ if (sid & ~0xf) {
+ dev_err(&ctrl->dev, "exceed the max slv id\n");
+ return -EINVAL;
+ }
+
+ if (len > 4) {
+ dev_err(&ctrl->dev, "pmif supports 1..4 bytes per trans, but:%zu requested", len);
+
+ return -EINVAL;
+ }
+
+ if (opc >= 0x60 && opc <= 0x7f)
+ opc = PMIF_CMD_REG;
+ else if ((opc >= 0x20 && opc <= 0x2f) || (opc >= 0x38 && opc <= 0x3f))
+ opc = PMIF_CMD_EXT_REG_LONG;
+ else
+ return -EINVAL;
+
+ /* Wait for Software Interface FSM state to be IDLE. */
+ inf_reg = &arb->chan;
+ ret = readl_poll_timeout_atomic(arb->base + arb->data->regs[inf_reg->ch_sta],
+ data, GET_SWINF(data) == SWINF_IDLE,
+ PMIF_DELAY_US, PMIF_TIMEOUT_US);
+ if (ret < 0) {
+ /* set channel ready if the data has transferred */
+ if (pmif_is_fsm_vldclr(arb))
+ pmif_writel(arb, 1, inf_reg->ch_rdy);
+ dev_err(&ctrl->dev, "failed to wait for SWINF_IDLE\n");
+ return ret;
+ }
+
+ /* Send the command. */
+ cmd = (opc << 30) | (sid << 24) | ((len - 1) << 16) | addr;
+ pmif_writel(arb, cmd, inf_reg->ch_send);
+
+ /*
+ * Wait for Software Interface FSM state to be WFVLDCLR,
+ * read the data and clear the valid flag.
+ */
+ ret = readl_poll_timeout_atomic(arb->base + arb->data->regs[inf_reg->ch_sta],
+ data, GET_SWINF(data) == SWINF_WFVLDCLR,
+ PMIF_DELAY_US, PMIF_TIMEOUT_US);
+ if (ret < 0) {
+ dev_err(&ctrl->dev, "failed to wait for SWINF_WFVLDCLR\n");
+ return ret;
+ }
+
+ data = pmif_readl(arb, inf_reg->rdata);
+ memcpy(buf, &data, len);
+ pmif_writel(arb, 1, inf_reg->ch_rdy);
+
+ return 0;
+}
+
+static int pmif_spmi_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
+ u16 addr, const u8 *buf, size_t len)
+{
+ struct pmif *arb = spmi_controller_get_drvdata(ctrl);
+ struct ch_reg *inf_reg;
+ int ret;
+ u32 data, cmd;
+
+ if (len > 4) {
+ dev_err(&ctrl->dev, "pmif supports 1..4 bytes per trans, but:%zu requested", len);
+
+ return -EINVAL;
+ }
+
+ /* Check the opcode */
+ if (opc >= 0x40 && opc <= 0x5F)
+ opc = PMIF_CMD_REG;
+ else if ((opc <= 0xF) || (opc >= 0x30 && opc <= 0x37))
+ opc = PMIF_CMD_EXT_REG_LONG;
+ else if (opc >= 0x80)
+ opc = PMIF_CMD_REG_0;
+ else
+ return -EINVAL;
+
+ /* Wait for Software Interface FSM state to be IDLE. */
+ inf_reg = &arb->chan;
+ ret = readl_poll_timeout_atomic(arb->base + arb->data->regs[inf_reg->ch_sta],
+ data, GET_SWINF(data) == SWINF_IDLE,
+ PMIF_DELAY_US, PMIF_TIMEOUT_US);
+ if (ret < 0) {
+ /* set channel ready if the data has transferred */
+ if (pmif_is_fsm_vldclr(arb))
+ pmif_writel(arb, 1, inf_reg->ch_rdy);
+ dev_err(&ctrl->dev, "failed to wait for SWINF_IDLE\n");
+ return ret;
+ }
+
+ /* Set the write data. */
+ memcpy(&data, buf, len);
+ pmif_writel(arb, data, inf_reg->wdata);
+
+ /* Send the command. */
+ cmd = (opc << 30) | BIT(29) | (sid << 24) | ((len - 1) << 16) | addr;
+ pmif_writel(arb, cmd, inf_reg->ch_send);
+
+ return 0;
+}
+
+static const struct pmif_data mt6873_pmif_arb = {
+ .regs = mt6873_regs,
+ .spmimst_regs = mt6873_spmi_regs,
+ .soc_chan = 2,
+};
+
+static const struct pmif_data mt8195_pmif_arb = {
+ .regs = mt8195_regs,
+ .spmimst_regs = mt8195_spmi_regs,
+ .soc_chan = 2,
+};
+
+static int mtk_spmi_probe(struct platform_device *pdev)
+{
+ struct pmif *arb;
+ struct spmi_controller *ctrl;
+ int err, i;
+ u32 chan_offset;
+
+ ctrl = spmi_controller_alloc(&pdev->dev, sizeof(*arb));
+ if (!ctrl)
+ return -ENOMEM;
+
+ arb = spmi_controller_get_drvdata(ctrl);
+ arb->data = device_get_match_data(&pdev->dev);
+ if (!arb->data) {
+ err = -EINVAL;
+ dev_err(&pdev->dev, "Cannot get drv_data\n");
+ goto err_put_ctrl;
+ }
+
+ arb->base = devm_platform_ioremap_resource_byname(pdev, "pmif");
+ if (IS_ERR(arb->base)) {
+ err = PTR_ERR(arb->base);
+ goto err_put_ctrl;
+ }
+
+ arb->spmimst_base = devm_platform_ioremap_resource_byname(pdev, "spmimst");
+ if (IS_ERR(arb->spmimst_base)) {
+ err = PTR_ERR(arb->spmimst_base);
+ goto err_put_ctrl;
+ }
+
+ arb->nclks = ARRAY_SIZE(pmif_clock_names);
+ for (i = 0; i < arb->nclks; i++)
+ arb->clks[i].id = pmif_clock_names[i];
+
+ err = devm_clk_bulk_get(&pdev->dev, arb->nclks, arb->clks);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to get clocks: %d\n", err);
+ goto err_put_ctrl;
+ }
+
+ err = clk_bulk_prepare_enable(arb->nclks, arb->clks);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable clocks: %d\n", err);
+ goto err_put_ctrl;
+ }
+
+ ctrl->cmd = pmif_arb_cmd;
+ ctrl->read_cmd = pmif_spmi_read_cmd;
+ ctrl->write_cmd = pmif_spmi_write_cmd;
+
+ chan_offset = PMIF_CHAN_OFFSET * arb->data->soc_chan;
+ arb->chan.ch_sta = PMIF_SWINF_0_STA + chan_offset;
+ arb->chan.wdata = PMIF_SWINF_0_WDATA_31_0 + chan_offset;
+ arb->chan.rdata = PMIF_SWINF_0_RDATA_31_0 + chan_offset;
+ arb->chan.ch_send = PMIF_SWINF_0_ACC + chan_offset;
+ arb->chan.ch_rdy = PMIF_SWINF_0_VLD_CLR + chan_offset;
+
+ platform_set_drvdata(pdev, ctrl);
+
+ err = spmi_controller_add(ctrl);
+ if (err)
+ goto err_domain_remove;
+
+ return 0;
+
+err_domain_remove:
+ clk_bulk_disable_unprepare(arb->nclks, arb->clks);
+err_put_ctrl:
+ spmi_controller_put(ctrl);
+ return err;
+}
+
+static int mtk_spmi_remove(struct platform_device *pdev)
+{
+ struct spmi_controller *ctrl = platform_get_drvdata(pdev);
+ struct pmif *arb = spmi_controller_get_drvdata(ctrl);
+
+ clk_bulk_disable_unprepare(arb->nclks, arb->clks);
+ spmi_controller_remove(ctrl);
+ spmi_controller_put(ctrl);
+ return 0;
+}
+
+static const struct of_device_id mtk_spmi_match_table[] = {
+ {
+ .compatible = "mediatek,mt6873-spmi",
+ .data = &mt6873_pmif_arb,
+ }, {
+ .compatible = "mediatek,mt8195-spmi",
+ .data = &mt8195_pmif_arb,
+ }, {
+ /* sentinel */
+ },
+};
+MODULE_DEVICE_TABLE(of, mtk_spmi_match_table);
+
+static struct platform_driver mtk_spmi_driver = {
+ .driver = {
+ .name = "spmi-mtk",
+ .of_match_table = of_match_ptr(mtk_spmi_match_table),
+ },
+ .probe = mtk_spmi_probe,
+ .remove = mtk_spmi_remove,
+};
+module_platform_driver(mtk_spmi_driver);
+
+MODULE_AUTHOR("Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek SPMI Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index bbbd311eda03..2113be40b5a9 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -261,20 +261,21 @@ static int pmic_arb_wait_for_done(struct spmi_controller *ctrl,
if (status & PMIC_ARB_STATUS_DONE) {
if (status & PMIC_ARB_STATUS_DENIED) {
- dev_err(&ctrl->dev, "%s: transaction denied (0x%x)\n",
- __func__, status);
+ dev_err(&ctrl->dev, "%s: %#x %#x: transaction denied (%#x)\n",
+ __func__, sid, addr, status);
return -EPERM;
}
if (status & PMIC_ARB_STATUS_FAILURE) {
- dev_err(&ctrl->dev, "%s: transaction failed (0x%x)\n",
- __func__, status);
+ dev_err(&ctrl->dev, "%s: %#x %#x: transaction failed (%#x)\n",
+ __func__, sid, addr, status);
+ WARN_ON(1);
return -EIO;
}
if (status & PMIC_ARB_STATUS_DROPPED) {
- dev_err(&ctrl->dev, "%s: transaction dropped (0x%x)\n",
- __func__, status);
+ dev_err(&ctrl->dev, "%s: %#x %#x: transaction dropped (%#x)\n",
+ __func__, sid, addr, status);
return -EIO;
}
@@ -283,8 +284,8 @@ static int pmic_arb_wait_for_done(struct spmi_controller *ctrl,
udelay(1);
}
- dev_err(&ctrl->dev, "%s: timeout, status 0x%x\n",
- __func__, status);
+ dev_err(&ctrl->dev, "%s: %#x %#x: timeout, status %#x\n",
+ __func__, sid, addr, status);
return -ETIMEDOUT;
}
@@ -333,24 +334,20 @@ static int pmic_arb_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid)
return pmic_arb->ver_ops->non_data_cmd(ctrl, opc, sid);
}
-static int pmic_arb_read_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
- u16 addr, u8 *buf, size_t len)
+static int pmic_arb_fmt_read_cmd(struct spmi_pmic_arb *pmic_arb, u8 opc, u8 sid,
+ u16 addr, size_t len, u32 *cmd, u32 *offset)
{
- struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
- unsigned long flags;
u8 bc = len - 1;
- u32 cmd;
int rc;
- u32 offset;
rc = pmic_arb->ver_ops->offset(pmic_arb, sid, addr,
PMIC_ARB_CHANNEL_OBS);
if (rc < 0)
return rc;
- offset = rc;
+ *offset = rc;
if (bc >= PMIC_ARB_MAX_TRANS_BYTES) {
- dev_err(&ctrl->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested",
+ dev_err(&pmic_arb->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested",
PMIC_ARB_MAX_TRANS_BYTES, len);
return -EINVAL;
}
@@ -365,14 +362,24 @@ static int pmic_arb_read_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
else
return -EINVAL;
- cmd = pmic_arb->ver_ops->fmt_cmd(opc, sid, addr, bc);
+ *cmd = pmic_arb->ver_ops->fmt_cmd(opc, sid, addr, bc);
+
+ return 0;
+}
+
+static int pmic_arb_read_cmd_unlocked(struct spmi_controller *ctrl, u32 cmd,
+ u32 offset, u8 sid, u16 addr, u8 *buf,
+ size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
+ u8 bc = len - 1;
+ int rc;
- raw_spin_lock_irqsave(&pmic_arb->lock, flags);
pmic_arb_set_rd_cmd(pmic_arb, offset + PMIC_ARB_CMD, cmd);
rc = pmic_arb_wait_for_done(ctrl, pmic_arb->rd_base, sid, addr,
PMIC_ARB_CHANNEL_OBS);
if (rc)
- goto done;
+ return rc;
pmic_arb_read_data(pmic_arb, buf, offset + PMIC_ARB_RDATA0,
min_t(u8, bc, 3));
@@ -380,30 +387,44 @@ static int pmic_arb_read_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
if (bc > 3)
pmic_arb_read_data(pmic_arb, buf + 4, offset + PMIC_ARB_RDATA1,
bc - 4);
+ return 0;
+}
-done:
+static int pmic_arb_read_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
+ u16 addr, u8 *buf, size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
+ unsigned long flags;
+ u32 cmd, offset;
+ int rc;
+
+ rc = pmic_arb_fmt_read_cmd(pmic_arb, opc, sid, addr, len, &cmd,
+ &offset);
+ if (rc)
+ return rc;
+
+ raw_spin_lock_irqsave(&pmic_arb->lock, flags);
+ rc = pmic_arb_read_cmd_unlocked(ctrl, cmd, offset, sid, addr, buf, len);
raw_spin_unlock_irqrestore(&pmic_arb->lock, flags);
+
return rc;
}
-static int pmic_arb_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
- u16 addr, const u8 *buf, size_t len)
+static int pmic_arb_fmt_write_cmd(struct spmi_pmic_arb *pmic_arb, u8 opc,
+ u8 sid, u16 addr, size_t len, u32 *cmd,
+ u32 *offset)
{
- struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
- unsigned long flags;
u8 bc = len - 1;
- u32 cmd;
int rc;
- u32 offset;
rc = pmic_arb->ver_ops->offset(pmic_arb, sid, addr,
PMIC_ARB_CHANNEL_RW);
if (rc < 0)
return rc;
- offset = rc;
+ *offset = rc;
if (bc >= PMIC_ARB_MAX_TRANS_BYTES) {
- dev_err(&ctrl->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested",
+ dev_err(&pmic_arb->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested",
PMIC_ARB_MAX_TRANS_BYTES, len);
return -EINVAL;
}
@@ -420,10 +441,19 @@ static int pmic_arb_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
else
return -EINVAL;
- cmd = pmic_arb->ver_ops->fmt_cmd(opc, sid, addr, bc);
+ *cmd = pmic_arb->ver_ops->fmt_cmd(opc, sid, addr, bc);
+
+ return 0;
+}
+
+static int pmic_arb_write_cmd_unlocked(struct spmi_controller *ctrl, u32 cmd,
+ u32 offset, u8 sid, u16 addr,
+ const u8 *buf, size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
+ u8 bc = len - 1;
/* Write data to FIFOs */
- raw_spin_lock_irqsave(&pmic_arb->lock, flags);
pmic_arb_write_data(pmic_arb, buf, offset + PMIC_ARB_WDATA0,
min_t(u8, bc, 3));
if (bc > 3)
@@ -432,8 +462,62 @@ static int pmic_arb_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
/* Start the transaction */
pmic_arb_base_write(pmic_arb, offset + PMIC_ARB_CMD, cmd);
- rc = pmic_arb_wait_for_done(ctrl, pmic_arb->wr_base, sid, addr,
- PMIC_ARB_CHANNEL_RW);
+ return pmic_arb_wait_for_done(ctrl, pmic_arb->wr_base, sid, addr,
+ PMIC_ARB_CHANNEL_RW);
+}
+
+static int pmic_arb_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid,
+ u16 addr, const u8 *buf, size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
+ unsigned long flags;
+ u32 cmd, offset;
+ int rc;
+
+ rc = pmic_arb_fmt_write_cmd(pmic_arb, opc, sid, addr, len, &cmd,
+ &offset);
+ if (rc)
+ return rc;
+
+ raw_spin_lock_irqsave(&pmic_arb->lock, flags);
+ rc = pmic_arb_write_cmd_unlocked(ctrl, cmd, offset, sid, addr, buf,
+ len);
+ raw_spin_unlock_irqrestore(&pmic_arb->lock, flags);
+
+ return rc;
+}
+
+static int pmic_arb_masked_write(struct spmi_controller *ctrl, u8 sid, u16 addr,
+ const u8 *buf, const u8 *mask, size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = spmi_controller_get_drvdata(ctrl);
+ u32 read_cmd, read_offset, write_cmd, write_offset;
+ u8 temp[PMIC_ARB_MAX_TRANS_BYTES];
+ unsigned long flags;
+ int rc, i;
+
+ rc = pmic_arb_fmt_read_cmd(pmic_arb, SPMI_CMD_EXT_READL, sid, addr, len,
+ &read_cmd, &read_offset);
+ if (rc)
+ return rc;
+
+ rc = pmic_arb_fmt_write_cmd(pmic_arb, SPMI_CMD_EXT_WRITEL, sid, addr,
+ len, &write_cmd, &write_offset);
+ if (rc)
+ return rc;
+
+ raw_spin_lock_irqsave(&pmic_arb->lock, flags);
+ rc = pmic_arb_read_cmd_unlocked(ctrl, read_cmd, read_offset, sid, addr,
+ temp, len);
+ if (rc)
+ goto done;
+
+ for (i = 0; i < len; i++)
+ temp[i] = (temp[i] & ~mask[i]) | (buf[i] & mask[i]);
+
+ rc = pmic_arb_write_cmd_unlocked(ctrl, write_cmd, write_offset, sid,
+ addr, temp, len);
+done:
raw_spin_unlock_irqrestore(&pmic_arb->lock, flags);
return rc;
@@ -482,6 +566,23 @@ static void qpnpint_spmi_read(struct irq_data *d, u8 reg, void *buf, size_t len)
d->irq);
}
+static int qpnpint_spmi_masked_write(struct irq_data *d, u8 reg,
+ const void *buf, const void *mask,
+ size_t len)
+{
+ struct spmi_pmic_arb *pmic_arb = irq_data_get_irq_chip_data(d);
+ u8 sid = hwirq_to_sid(d->hwirq);
+ u8 per = hwirq_to_per(d->hwirq);
+ int rc;
+
+ rc = pmic_arb_masked_write(pmic_arb->spmic, sid, (per << 8) + reg, buf,
+ mask, len);
+ if (rc)
+ dev_err_ratelimited(&pmic_arb->spmic->dev, "failed irqchip transaction on %x rc=%d\n",
+ d->irq, rc);
+ return rc;
+}
+
static void cleanup_irq(struct spmi_pmic_arb *pmic_arb, u16 apid, int id)
{
u16 ppid = pmic_arb->apid_data[apid].ppid;
@@ -600,18 +701,18 @@ static void qpnpint_irq_unmask(struct irq_data *d)
static int qpnpint_irq_set_type(struct irq_data *d, unsigned int flow_type)
{
- struct spmi_pmic_arb_qpnpint_type type;
+ struct spmi_pmic_arb_qpnpint_type type = {0};
+ struct spmi_pmic_arb_qpnpint_type mask;
irq_flow_handler_t flow_handler;
- u8 irq = hwirq_to_irq(d->hwirq);
-
- qpnpint_spmi_read(d, QPNPINT_REG_SET_TYPE, &type, sizeof(type));
+ u8 irq_bit = BIT(hwirq_to_irq(d->hwirq));
+ int rc;
if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
- type.type |= BIT(irq);
+ type.type = irq_bit;
if (flow_type & IRQF_TRIGGER_RISING)
- type.polarity_high |= BIT(irq);
+ type.polarity_high = irq_bit;
if (flow_type & IRQF_TRIGGER_FALLING)
- type.polarity_low |= BIT(irq);
+ type.polarity_low = irq_bit;
flow_handler = handle_edge_irq;
} else {
@@ -619,19 +720,23 @@ static int qpnpint_irq_set_type(struct irq_data *d, unsigned int flow_type)
(flow_type & (IRQF_TRIGGER_LOW)))
return -EINVAL;
- type.type &= ~BIT(irq); /* level trig */
if (flow_type & IRQF_TRIGGER_HIGH)
- type.polarity_high |= BIT(irq);
+ type.polarity_high = irq_bit;
else
- type.polarity_low |= BIT(irq);
+ type.polarity_low = irq_bit;
flow_handler = handle_level_irq;
}
- qpnpint_spmi_write(d, QPNPINT_REG_SET_TYPE, &type, sizeof(type));
+ mask.type = irq_bit;
+ mask.polarity_high = irq_bit;
+ mask.polarity_low = irq_bit;
+
+ rc = qpnpint_spmi_masked_write(d, QPNPINT_REG_SET_TYPE, &type, &mask,
+ sizeof(type));
irq_set_handler_locked(d, flow_handler);
- return 0;
+ return rc;
}
static int qpnpint_irq_set_wake(struct irq_data *d, unsigned int on)
diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
index 91fcf85e150a..5a58dac76c88 100644
--- a/drivers/staging/rts5208/rtsx.c
+++ b/drivers/staging/rts5208/rtsx.c
@@ -450,13 +450,13 @@ skip_for_abort:
* after the down() -- that's necessary for the thread-shutdown
* case.
*
- * complete_and_exit() goes even further than this -- it is safe in
- * the case that the thread of the caller is going away (not just
- * the structure) -- this is necessary for the module-remove case.
- * This is important in preemption kernels, which transfer the flow
- * of execution immediately upon a complete().
+ * kthread_complete_and_exit() goes even further than this --
+ * it is safe in the case that the thread of the caller is going away
+ * (not just the structure) -- this is necessary for the module-remove
+ * case. This is important in preemption kernels, which transfer the
+ * flow of execution immediately upon a complete().
*/
- complete_and_exit(&dev->control_exit, 0);
+ kthread_complete_and_exit(&dev->control_exit, 0);
}
static int rtsx_polling_thread(void *__dev)
@@ -501,7 +501,7 @@ static int rtsx_polling_thread(void *__dev)
mutex_unlock(&dev->dev_mutex);
}
- complete_and_exit(&dev->polling_exit, 0);
+ kthread_complete_and_exit(&dev->polling_exit, 0);
}
/*
@@ -682,7 +682,7 @@ static int rtsx_scan_thread(void *__dev)
/* Should we unbind if no devices were detected? */
}
- complete_and_exit(&dev->scanning_done, 0);
+ kthread_complete_and_exit(&dev->scanning_done, 0);
}
static void rtsx_init_options(struct rtsx_chip *chip)
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 8502b7d8df89..72acb1f61849 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -596,6 +596,7 @@ static const struct acpi_device_id int3400_thermal_match[] = {
{"INT3400", 0},
{"INTC1040", 0},
{"INTC1041", 0},
+ {"INTC10A0", 0},
{}
};
diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
index c3c4c4d34542..07e25321dfe3 100644
--- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
@@ -285,6 +285,7 @@ static const struct acpi_device_id int3403_device_ids[] = {
{"INT3403", 0},
{"INTC1043", 0},
{"INTC1046", 0},
+ {"INTC10A1", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
index 9b2a64ef55d0..49932a68abac 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
@@ -24,6 +24,7 @@
#define PCI_DEVICE_ID_INTEL_HSB_THERMAL 0x0A03
#define PCI_DEVICE_ID_INTEL_ICL_THERMAL 0x8a03
#define PCI_DEVICE_ID_INTEL_JSL_THERMAL 0x4E03
+#define PCI_DEVICE_ID_INTEL_RPL_THERMAL 0xA71D
#define PCI_DEVICE_ID_INTEL_SKL_THERMAL 0x1903
#define PCI_DEVICE_ID_INTEL_TGL_THERMAL 0x9A03
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index b4bcd3fe9eb2..ca40b0967cdd 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -358,6 +358,7 @@ static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+ { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
{ },
};
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index a38fd65e39ab..8933ef1f83c0 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1938,7 +1938,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
more = n - (size - tail);
if (eol == N_TTY_BUF_SIZE && more) {
/* scan wrapped without finding set bit */
- eol = find_next_bit(ldata->read_flags, more, 0);
+ eol = find_first_bit(ldata->read_flags, more);
found = eol != more;
} else
found = eol != size;
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index ea96e319c8a0..43afbb7c5ab9 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -83,13 +83,14 @@ static struct map_sysfs_entry size_attribute =
static struct map_sysfs_entry offset_attribute =
__ATTR(offset, S_IRUGO, map_offset_show, NULL);
-static struct attribute *attrs[] = {
+static struct attribute *map_attrs[] = {
&name_attribute.attr,
&addr_attribute.attr,
&size_attribute.attr,
&offset_attribute.attr,
NULL, /* need to NULL terminate the list of attributes */
};
+ATTRIBUTE_GROUPS(map);
static void map_release(struct kobject *kobj)
{
@@ -119,7 +120,7 @@ static const struct sysfs_ops map_sysfs_ops = {
static struct kobj_type map_attr_type = {
.release = map_release,
.sysfs_ops = &map_sysfs_ops,
- .default_attrs = attrs,
+ .default_groups = map_groups,
};
struct uio_portio {
@@ -178,6 +179,7 @@ static struct attribute *portio_attrs[] = {
&portio_porttype_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(portio);
static void portio_release(struct kobject *kobj)
{
@@ -207,7 +209,7 @@ static const struct sysfs_ops portio_sysfs_ops = {
static struct kobj_type portio_attr_type = {
.release = portio_release,
.sysfs_ops = &portio_sysfs_ops,
- .default_attrs = portio_attrs,
+ .default_groups = portio_groups,
};
static ssize_t name_show(struct device *dev,
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index 6b5cfa5b0673..1106f3376404 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -188,7 +188,11 @@ static int uio_dmem_genirq_probe(struct platform_device *pdev)
return -ENOMEM;
}
- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+ ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(&pdev->dev, "DMA enable failed\n");
+ return ret;
+ }
priv->uioinfo = uioinfo;
spin_lock_init(&priv->lock);
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index da17be1ef64e..e3a49d837609 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -969,7 +969,7 @@ static int usbatm_do_heavy_init(void *arg)
instance->thread = NULL;
mutex_unlock(&instance->serialize);
- complete_and_exit(&instance->thread_exited, ret);
+ kthread_complete_and_exit(&instance->thread_exited, ret);
}
static int usbatm_heavy_init(struct usbatm_data *instance)
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 752439690fda..46dd11dcb3a8 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2547,7 +2547,7 @@ static int fsg_main_thread(void *common_)
up_write(&common->filesem);
/* Let fsg_unbind() know the thread has exited */
- complete_and_exit(&common->thread_notifier, 0);
+ kthread_complete_and_exit(&common->thread_notifier, 0);
}
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 64de9f1b874c..431d5a7d737e 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -1117,7 +1117,7 @@ static int rndis_proc_show(struct seq_file *m, void *v)
static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- rndis_params *p = PDE_DATA(file_inode(file));
+ rndis_params *p = pde_data(file_inode(file));
u32 speed = 0;
int i, fl_speed = 0;
@@ -1161,7 +1161,7 @@ static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
static int rndis_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rndis_proc_show, PDE_DATA(inode));
+ return single_open(file, rndis_proc_show, pde_data(inode));
}
static const struct proc_ops rndis_proc_ops = {
diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c
index 3f788794571a..f480d54f308c 100644
--- a/drivers/vdpa/alibaba/eni_vdpa.c
+++ b/drivers/vdpa/alibaba/eni_vdpa.c
@@ -58,7 +58,7 @@ static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa)
return &eni_vdpa->ldev;
}
-static u64 eni_vdpa_get_features(struct vdpa_device *vdpa)
+static u64 eni_vdpa_get_device_features(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
u64 features = vp_legacy_get_features(ldev);
@@ -69,7 +69,7 @@ static u64 eni_vdpa_get_features(struct vdpa_device *vdpa)
return features;
}
-static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+static int eni_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
@@ -84,6 +84,13 @@ static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
return 0;
}
+static u64 eni_vdpa_get_driver_features(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+ return vp_legacy_get_driver_features(ldev);
+}
+
static u8 eni_vdpa_get_status(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
@@ -401,8 +408,9 @@ static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa,
}
static const struct vdpa_config_ops eni_vdpa_ops = {
- .get_features = eni_vdpa_get_features,
- .set_features = eni_vdpa_set_features,
+ .get_device_features = eni_vdpa_get_device_features,
+ .set_driver_features = eni_vdpa_set_driver_features,
+ .get_driver_features = eni_vdpa_get_driver_features,
.get_status = eni_vdpa_get_status,
.set_status = eni_vdpa_set_status,
.reset = eni_vdpa_reset,
@@ -450,11 +458,6 @@ static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa)
return num;
}
-static void eni_vdpa_free_irq_vectors(void *data)
-{
- pci_free_irq_vectors(data);
-}
-
static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
@@ -488,13 +491,6 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
eni_vdpa->vdpa.dma_dev = &pdev->dev;
eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa);
- ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev);
- if (ret) {
- ENI_ERR(pdev,
- "failed for adding devres for freeing irq vectors\n");
- goto err;
- }
-
eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues,
sizeof(*eni_vdpa->vring),
GFP_KERNEL);
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 2808f1ba9f7b..7d41dfe48ade 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -143,8 +143,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr);
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
- hw->net_cfg = get_cap_addr(hw, &cap);
- IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg);
+ hw->dev_cfg = get_cap_addr(hw, &cap);
+ IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
break;
}
@@ -153,7 +153,7 @@ next:
}
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
- hw->isr == NULL || hw->net_cfg == NULL) {
+ hw->isr == NULL || hw->dev_cfg == NULL) {
IFCVF_ERR(pdev, "Incomplete PCI capabilities\n");
return -EIO;
}
@@ -174,7 +174,7 @@ next:
IFCVF_DBG(pdev,
"PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n",
hw->common_cfg, hw->notify_base, hw->isr,
- hw->net_cfg, hw->notify_off_multiplier);
+ hw->dev_cfg, hw->notify_off_multiplier);
return 0;
}
@@ -242,33 +242,54 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
return 0;
}
-void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
+u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
+{
+ struct ifcvf_adapter *adapter;
+ u32 config_size;
+
+ adapter = vf_to_adapter(hw);
+ switch (hw->dev_type) {
+ case VIRTIO_ID_NET:
+ config_size = sizeof(struct virtio_net_config);
+ break;
+ case VIRTIO_ID_BLOCK:
+ config_size = sizeof(struct virtio_blk_config);
+ break;
+ default:
+ config_size = 0;
+ IFCVF_ERR(adapter->pdev, "VIRTIO ID %u not supported\n", hw->dev_type);
+ }
+
+ return config_size;
+}
+
+void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
void *dst, int length)
{
u8 old_gen, new_gen, *p;
int i;
- WARN_ON(offset + length > sizeof(struct virtio_net_config));
+ WARN_ON(offset + length > hw->config_size);
do {
old_gen = ifc_ioread8(&hw->common_cfg->config_generation);
p = dst;
for (i = 0; i < length; i++)
- *p++ = ifc_ioread8(hw->net_cfg + offset + i);
+ *p++ = ifc_ioread8(hw->dev_cfg + offset + i);
new_gen = ifc_ioread8(&hw->common_cfg->config_generation);
} while (old_gen != new_gen);
}
-void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
+void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
const void *src, int length)
{
const u8 *p;
int i;
p = src;
- WARN_ON(offset + length > sizeof(struct virtio_net_config));
+ WARN_ON(offset + length > hw->config_size);
for (i = 0; i < length; i++)
- ifc_iowrite8(*p++, hw->net_cfg + offset + i);
+ ifc_iowrite8(*p++, hw->dev_cfg + offset + i);
}
static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 09918af3ecf8..c486873f370a 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -71,12 +71,14 @@ struct ifcvf_hw {
u64 hw_features;
u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
- void __iomem *net_cfg;
+ void __iomem *dev_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES];
void __iomem * const *base;
char config_msix_name[256];
struct vdpa_callback config_cb;
unsigned int config_irq;
+ /* virtio-net or virtio-blk device config size */
+ u32 config_size;
};
struct ifcvf_adapter {
@@ -105,9 +107,9 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
int ifcvf_start_hw(struct ifcvf_hw *hw);
void ifcvf_stop_hw(struct ifcvf_hw *hw);
void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
-void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
+void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
void *dst, int length);
-void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
+void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
const void *src, int length);
u8 ifcvf_get_status(struct ifcvf_hw *hw);
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
@@ -120,4 +122,5 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
+u32 ifcvf_get_config_size(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 6dc75ca70b37..d1a6b5ab543c 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -169,7 +169,7 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
return &adapter->vf;
}
-static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
+static u64 ifcvf_vdpa_get_device_features(struct vdpa_device *vdpa_dev)
{
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
@@ -187,7 +187,7 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
return features;
}
-static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
+static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
int ret;
@@ -201,6 +201,13 @@ static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
return 0;
}
+static u64 ifcvf_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return vf->req_features;
+}
+
static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
@@ -366,24 +373,9 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
{
- struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
- struct pci_dev *pdev = adapter->pdev;
- size_t size;
-
- switch (vf->dev_type) {
- case VIRTIO_ID_NET:
- size = sizeof(struct virtio_net_config);
- break;
- case VIRTIO_ID_BLOCK:
- size = sizeof(struct virtio_blk_config);
- break;
- default:
- size = 0;
- IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
- }
- return size;
+ return vf->config_size;
}
static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
@@ -392,8 +384,7 @@ static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
- WARN_ON(offset + len > sizeof(struct virtio_net_config));
- ifcvf_read_net_config(vf, offset, buf, len);
+ ifcvf_read_dev_config(vf, offset, buf, len);
}
static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
@@ -402,8 +393,7 @@ static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
- WARN_ON(offset + len > sizeof(struct virtio_net_config));
- ifcvf_write_net_config(vf, offset, buf, len);
+ ifcvf_write_dev_config(vf, offset, buf, len);
}
static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
@@ -443,8 +433,9 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
* implemented set_map()/dma_map()/dma_unmap()
*/
static const struct vdpa_config_ops ifc_vdpa_ops = {
- .get_features = ifcvf_vdpa_get_features,
- .set_features = ifcvf_vdpa_set_features,
+ .get_device_features = ifcvf_vdpa_get_device_features,
+ .set_driver_features = ifcvf_vdpa_set_driver_features,
+ .get_driver_features = ifcvf_vdpa_get_driver_features,
.get_status = ifcvf_vdpa_get_status,
.set_status = ifcvf_vdpa_set_status,
.reset = ifcvf_vdpa_reset,
@@ -542,6 +533,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
+ vf->config_size = ifcvf_get_config_size(vf);
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index cf59f7e17c6d..f648f1c54a0f 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -131,25 +131,24 @@ struct mlx5_vdpa_virtqueue {
struct mlx5_vq_restore_info ri;
};
-/* We will remove this limitation once mlx5_vdpa_alloc_resources()
- * provides for driver space allocation
- */
-#define MLX5_MAX_SUPPORTED_VQS 16
-
static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
{
- if (unlikely(idx > mvdev->max_idx))
- return false;
+ if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
+ if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+ return idx < 2;
+ else
+ return idx < 3;
+ }
- return true;
+ return idx <= mvdev->max_idx;
}
struct mlx5_vdpa_net {
struct mlx5_vdpa_dev mvdev;
struct mlx5_vdpa_net_resources res;
struct virtio_net_config config;
- struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
- struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
+ struct mlx5_vdpa_virtqueue *vqs;
+ struct vdpa_callback *event_cbs;
/* Serialize vq resources creation and destruction. This is required
* since memory map might change and we need to destroy and create
@@ -876,8 +875,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
- if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
- MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
if (err)
@@ -1218,7 +1215,7 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
{
int i;
- for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
+ for (i = 0; i < ndev->mvdev.max_vqs; i++)
suspend_vq(ndev, &ndev->vqs[i]);
}
@@ -1244,8 +1241,14 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
void *in;
int i, j;
int err;
+ int num;
- max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
+ num = 1;
+ else
+ num = ndev->cur_num_vqs / 2;
+
+ max_rqt = min_t(int, roundup_pow_of_two(num),
1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
if (max_rqt < 1)
return -EOPNOTSUPP;
@@ -1261,17 +1264,10 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
- for (i = 0, j = 0; j < max_rqt; j++) {
- if (!ndev->vqs[j].initialized)
- continue;
-
- if (!vq_is_tx(ndev->vqs[j].index)) {
- list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
- i++;
- }
- }
- MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
+ for (i = 0, j = 0; i < max_rqt; i++, j += 2)
+ list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
kfree(in);
if (err)
@@ -1292,7 +1288,7 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
int i, j;
int err;
- max_rqt = min_t(int, ndev->cur_num_vqs / 2,
+ max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2),
1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
if (max_rqt < 1)
return -EOPNOTSUPP;
@@ -1308,16 +1304,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
- for (i = 0, j = 0; j < num; j++) {
- if (!ndev->vqs[j].initialized)
- continue;
+ for (i = 0, j = 0; i < max_rqt; i++, j += 2)
+ list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
- if (!vq_is_tx(ndev->vqs[j].index)) {
- list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
- i++;
- }
- }
- MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
kfree(in);
if (err)
@@ -1554,9 +1544,11 @@ static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
return 0;
clean_added:
- for (--i; i >= cur_qps; --i)
+ for (--i; i >= 2 * cur_qps; --i)
teardown_vq(ndev, &ndev->vqs[i]);
+ ndev->cur_num_vqs = 2 * cur_qps;
+
return err;
}
@@ -1581,9 +1573,6 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
break;
}
- if (newqps & (newqps - 1))
- break;
-
if (!change_num_qps(mvdev, newqps))
status = VIRTIO_NET_OK;
@@ -1880,21 +1869,29 @@ static u64 mlx_to_vritio_features(u16 dev_features)
return result;
}
-static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
+static u64 get_supported_features(struct mlx5_core_dev *mdev)
{
- struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
- struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ u64 mlx_vdpa_features = 0;
u16 dev_features;
- dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
- ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
- if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
+ dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
+ mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
+ if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
+
+ return mlx_vdpa_features;
+}
+
+static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
print_features(mvdev, ndev->mvdev.mlx_features, false);
return ndev->mvdev.mlx_features;
@@ -1972,7 +1969,7 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
}
}
-static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
+static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
@@ -1985,6 +1982,11 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
return err;
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
+ if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
+ ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
+ else
+ ndev->cur_num_vqs = 2;
+
update_cvq_info(mvdev);
return err;
}
@@ -2235,7 +2237,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status = 0;
- memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
+ ndev->cur_num_vqs = 0;
+ memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
ndev->mvdev.actual_features = 0;
++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
@@ -2308,6 +2311,8 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
}
mlx5_vdpa_free_resources(&ndev->mvdev);
mutex_destroy(&ndev->reslock);
+ kfree(ndev->event_cbs);
+ kfree(ndev->vqs);
}
static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
@@ -2339,6 +2344,13 @@ static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
return -EOPNOTSUPP;
}
+static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+ return mvdev->actual_features;
+}
+
static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_vq_address = mlx5_vdpa_set_vq_address,
.set_vq_num = mlx5_vdpa_set_vq_num,
@@ -2351,8 +2363,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_vq_notification = mlx5_get_vq_notification,
.get_vq_irq = mlx5_get_vq_irq,
.get_vq_align = mlx5_vdpa_get_vq_align,
- .get_features = mlx5_vdpa_get_features,
- .set_features = mlx5_vdpa_set_features,
+ .get_device_features = mlx5_vdpa_get_device_features,
+ .set_driver_features = mlx5_vdpa_set_driver_features,
+ .get_driver_features = mlx5_vdpa_get_driver_features,
.set_config_cb = mlx5_vdpa_set_config_cb,
.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
.get_device_id = mlx5_vdpa_get_device_id,
@@ -2545,18 +2558,39 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -EOPNOTSUPP;
}
- /* we save one virtqueue for control virtqueue should we require it */
max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
- max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
+ if (max_vqs < 2) {
+ dev_warn(mdev->device,
+ "%d virtqueues are supported. At least 2 are required\n",
+ max_vqs);
+ return -EAGAIN;
+ }
+
+ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
+ if (add_config->net.max_vq_pairs > max_vqs / 2)
+ return -EINVAL;
+ max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
+ } else {
+ max_vqs = 2;
+ }
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
name, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
+ ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
ndev->mvdev.max_vqs = max_vqs;
mvdev = &ndev->mvdev;
mvdev->mdev = mdev;
+
+ ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
+ ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
+ if (!ndev->vqs || !ndev->event_cbs) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
init_mvqs(ndev);
mutex_init(&ndev->reslock);
config = &ndev->config;
@@ -2612,9 +2646,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb);
- ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
mvdev->vdev.mdev = &mgtdev->mgtdev;
- err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
+ err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1);
if (err)
goto err_reg;
@@ -2634,6 +2667,7 @@ err_mpfs:
mlx5_mpfs_del_mac(pfmdev, config->mac);
err_mtu:
mutex_destroy(&ndev->reslock);
+err_alloc:
put_device(&mvdev->vdev.dev);
return err;
}
@@ -2676,7 +2710,11 @@ static int mlx5v_probe(struct auxiliary_device *adev,
mgtdev->mgtdev.ops = &mdev_ops;
mgtdev->mgtdev.device = mdev->device;
mgtdev->mgtdev.id_table = id_table;
- mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
+ mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
+ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
+ mgtdev->mgtdev.max_supported_vqs =
+ MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
+ mgtdev->mgtdev.supported_features = get_supported_features(mdev);
mgtdev->madev = madev;
err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 09bbe53c3ac4..9846c9de4bfa 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -21,6 +21,14 @@ static LIST_HEAD(mdev_head);
static DEFINE_MUTEX(vdpa_dev_mutex);
static DEFINE_IDA(vdpa_index_ida);
+void vdpa_set_status(struct vdpa_device *vdev, u8 status)
+{
+ mutex_lock(&vdev->cf_mutex);
+ vdev->config->set_status(vdev, status);
+ mutex_unlock(&vdev->cf_mutex);
+}
+EXPORT_SYMBOL(vdpa_set_status);
+
static struct genl_family vdpa_nl_family;
static int vdpa_dev_probe(struct device *d)
@@ -52,8 +60,81 @@ static void vdpa_dev_remove(struct device *d)
drv->remove(vdev);
}
+static int vdpa_dev_match(struct device *dev, struct device_driver *drv)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(dev);
+
+ /* Check override first, and if set, only use the named driver */
+ if (vdev->driver_override)
+ return strcmp(vdev->driver_override, drv->name) == 0;
+
+ /* Currently devices must be supported by all vDPA bus drivers */
+ return 1;
+}
+
+static ssize_t driver_override_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(dev);
+ const char *driver_override, *old;
+ char *cp;
+
+ /* We need to keep extra room for a newline */
+ if (count >= (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
+ if (!driver_override)
+ return -ENOMEM;
+
+ cp = strchr(driver_override, '\n');
+ if (cp)
+ *cp = '\0';
+
+ device_lock(dev);
+ old = vdev->driver_override;
+ if (strlen(driver_override)) {
+ vdev->driver_override = driver_override;
+ } else {
+ kfree(driver_override);
+ vdev->driver_override = NULL;
+ }
+ device_unlock(dev);
+
+ kfree(old);
+
+ return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(dev);
+ ssize_t len;
+
+ device_lock(dev);
+ len = snprintf(buf, PAGE_SIZE, "%s\n", vdev->driver_override);
+ device_unlock(dev);
+
+ return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
+static struct attribute *vdpa_dev_attrs[] = {
+ &dev_attr_driver_override.attr,
+ NULL,
+};
+
+static const struct attribute_group vdpa_dev_group = {
+ .attrs = vdpa_dev_attrs,
+};
+__ATTRIBUTE_GROUPS(vdpa_dev);
+
static struct bus_type vdpa_bus = {
.name = "vdpa",
+ .dev_groups = vdpa_dev_groups,
+ .match = vdpa_dev_match,
.probe = vdpa_dev_probe,
.remove = vdpa_dev_remove,
};
@@ -68,6 +149,7 @@ static void vdpa_release_dev(struct device *d)
ida_simple_remove(&vdpa_index_ida, vdev->index);
mutex_destroy(&vdev->cf_mutex);
+ kfree(vdev->driver_override);
kfree(vdev);
}
@@ -300,6 +382,21 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
}
EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
+static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
+ unsigned int offset,
+ void *buf, unsigned int len)
+{
+ const struct vdpa_config_ops *ops = vdev->config;
+
+ /*
+ * Config accesses aren't supposed to trigger before features are set.
+ * If it does happen we assume a legacy guest.
+ */
+ if (!vdev->features_valid)
+ vdpa_set_features(vdev, 0, true);
+ ops->get_config(vdev, offset, buf, len);
+}
+
/**
* vdpa_get_config - Get one or more device configuration fields.
* @vdev: vdpa device to operate on
@@ -310,16 +407,8 @@ EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len)
{
- const struct vdpa_config_ops *ops = vdev->config;
-
mutex_lock(&vdev->cf_mutex);
- /*
- * Config accesses aren't supposed to trigger before features are set.
- * If it does happen we assume a legacy guest.
- */
- if (!vdev->features_valid)
- vdpa_set_features(vdev, 0);
- ops->get_config(vdev, offset, buf, len);
+ vdpa_get_config_unlocked(vdev, offset, buf, len);
mutex_unlock(&vdev->cf_mutex);
}
EXPORT_SYMBOL_GPL(vdpa_get_config);
@@ -414,6 +503,16 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m
err = -EMSGSIZE;
goto msg_err;
}
+ if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,
+ mdev->max_supported_vqs)) {
+ err = -EMSGSIZE;
+ goto msg_err;
+ }
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES,
+ mdev->supported_features, VDPA_ATTR_PAD)) {
+ err = -EMSGSIZE;
+ goto msg_err;
+ }
genlmsg_end(msg, hdr);
return 0;
@@ -480,8 +579,9 @@ out:
return msg->len;
}
-#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
- (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
+#define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
+ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | \
+ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
{
@@ -500,12 +600,22 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
- config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
+ config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
}
if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
config.net.mtu =
nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
- config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
+ config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
+ }
+ if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) {
+ config.net.max_vq_pairs =
+ nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]);
+ if (!config.net.max_vq_pairs) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "At least one pair of VQs is required");
+ return -EINVAL;
+ }
+ config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
}
/* Skip checking capability if user didn't prefer to configure any
@@ -707,7 +817,7 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
{
u16 val_u16;
- if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0)
+ if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0)
return 0;
val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
@@ -720,7 +830,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
u64 features;
u16 val_u16;
- vdpa_get_config(vdev, 0, &config, sizeof(config));
+ vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
config.mac))
@@ -734,7 +844,10 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE;
- features = vdev->config->get_features(vdev);
+ features = vdev->config->get_driver_features(vdev);
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features,
+ VDPA_ATTR_PAD))
+ return -EMSGSIZE;
return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
}
@@ -745,12 +858,23 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
{
u32 device_id;
void *hdr;
+ u8 status;
int err;
+ mutex_lock(&vdev->cf_mutex);
+ status = vdev->config->get_status(vdev);
+ if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+ NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
+ err = -EAGAIN;
+ goto out;
+ }
+
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET);
- if (!hdr)
- return -EMSGSIZE;
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto out;
+ }
if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
err = -EMSGSIZE;
@@ -774,11 +898,14 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
if (err)
goto msg_err;
+ mutex_unlock(&vdev->cf_mutex);
genlmsg_end(msg, hdr);
return 0;
msg_err:
genlmsg_cancel(msg, hdr);
+out:
+ mutex_unlock(&vdev->cf_mutex);
return err;
}
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 41b0cd17fcba..ddbe142af09a 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -399,14 +399,14 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
return VDPASIM_QUEUE_ALIGN;
}
-static u64 vdpasim_get_features(struct vdpa_device *vdpa)
+static u64 vdpasim_get_device_features(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
return vdpasim->dev_attr.supported_features;
}
-static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
+static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -419,6 +419,13 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
return 0;
}
+static u64 vdpasim_get_driver_features(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->features;
+}
+
static void vdpasim_set_config_cb(struct vdpa_device *vdpa,
struct vdpa_callback *cb)
{
@@ -613,8 +620,9 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_vq_state = vdpasim_set_vq_state,
.get_vq_state = vdpasim_get_vq_state,
.get_vq_align = vdpasim_get_vq_align,
- .get_features = vdpasim_get_features,
- .set_features = vdpasim_set_features,
+ .get_device_features = vdpasim_get_device_features,
+ .set_driver_features = vdpasim_set_driver_features,
+ .get_driver_features = vdpasim_get_driver_features,
.set_config_cb = vdpasim_set_config_cb,
.get_vq_num_max = vdpasim_get_vq_num_max,
.get_device_id = vdpasim_get_device_id,
@@ -642,8 +650,9 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.set_vq_state = vdpasim_set_vq_state,
.get_vq_state = vdpasim_get_vq_state,
.get_vq_align = vdpasim_get_vq_align,
- .get_features = vdpasim_get_features,
- .set_features = vdpasim_set_features,
+ .get_device_features = vdpasim_get_device_features,
+ .set_driver_features = vdpasim_set_driver_features,
+ .get_driver_features = vdpasim_get_driver_features,
.set_config_cb = vdpasim_set_config_cb,
.get_vq_num_max = vdpasim_get_vq_num_max,
.get_device_id = vdpasim_get_device_id,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index 76dd24abc791..d5324f6fd8c7 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -191,6 +191,8 @@ static struct vdpa_mgmt_dev mgmt_dev = {
.ops = &vdpasim_net_mgmtdev_ops,
.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
1 << VDPA_ATTR_DEV_NET_CFG_MTU),
+ .max_supported_vqs = VDPASIM_NET_VQ_NUM,
+ .supported_features = VDPASIM_NET_FEATURES,
};
static int __init vdpasim_net_init(void)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index eddcb64a910a..f85d1a08ed87 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -573,14 +573,14 @@ static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
return dev->vq_align;
}
-static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
+static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
return dev->device_features;
}
-static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
@@ -588,6 +588,13 @@ static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
return 0;
}
+static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->driver_features;
+}
+
static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
struct vdpa_callback *cb)
{
@@ -721,8 +728,9 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.set_vq_state = vduse_vdpa_set_vq_state,
.get_vq_state = vduse_vdpa_get_vq_state,
.get_vq_align = vduse_vdpa_get_vq_align,
- .get_features = vduse_vdpa_get_features,
- .set_features = vduse_vdpa_set_features,
+ .get_device_features = vduse_vdpa_get_device_features,
+ .set_driver_features = vduse_vdpa_set_driver_features,
+ .get_driver_features = vduse_vdpa_get_driver_features,
.set_config_cb = vduse_vdpa_set_config_cb,
.get_vq_num_max = vduse_vdpa_get_vq_num_max,
.get_device_id = vduse_vdpa_get_device_id,
@@ -1357,7 +1365,6 @@ err_domain:
err_str:
vduse_dev_destroy(dev);
err:
- kvfree(config_buf);
return ret;
}
@@ -1408,6 +1415,8 @@ static long vduse_ioctl(struct file *file, unsigned int cmd,
}
config.name[VDUSE_NAME_MAX - 1] = '\0';
ret = vduse_create_dev(&config, buf, control->api_version);
+ if (ret)
+ kvfree(buf);
break;
}
case VDUSE_DESTROY_DEV: {
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index e3ff7875e123..a57e381e830b 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -53,14 +53,14 @@ static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa)
return &vp_vdpa->mdev;
}
-static u64 vp_vdpa_get_features(struct vdpa_device *vdpa)
+static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_get_features(mdev);
}
-static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+static int vp_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
@@ -69,6 +69,13 @@ static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
return 0;
}
+static u64 vp_vdpa_get_driver_features(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_driver_features(mdev);
+}
+
static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
@@ -415,8 +422,9 @@ vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
}
static const struct vdpa_config_ops vp_vdpa_ops = {
- .get_features = vp_vdpa_get_features,
- .set_features = vp_vdpa_set_features,
+ .get_device_features = vp_vdpa_get_device_features,
+ .set_driver_features = vp_vdpa_set_driver_features,
+ .get_driver_features = vp_vdpa_get_driver_features,
.get_status = vp_vdpa_get_status,
.set_status = vp_vdpa_set_status,
.reset = vp_vdpa_reset,
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 362f91ec8845..352c725ccf18 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -309,13 +309,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
if ((pos & 3) && size > 2) {
u16 val;
+ __le16 lval;
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le16(val);
- if (copy_to_user(buf + count - size, &val, 2))
+ lval = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &lval, 2))
return -EFAULT;
pos += 2;
@@ -324,13 +325,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
while (size > 3) {
u32 val;
+ __le32 lval;
ret = pci_user_read_config_dword(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le32(val);
- if (copy_to_user(buf + count - size, &val, 4))
+ lval = cpu_to_le32(val);
+ if (copy_to_user(buf + count - size, &lval, 4))
return -EFAULT;
pos += 4;
@@ -339,13 +341,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
while (size >= 2) {
u16 val;
+ __le16 lval;
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return ret;
- val = cpu_to_le16(val);
- if (copy_to_user(buf + count - size, &val, 2))
+ lval = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &lval, 2))
return -EFAULT;
pos += 2;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f17490ab238f..9394aa9444c1 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -256,7 +256,7 @@ static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
static void vfio_dma_bitmap_free(struct vfio_dma *dma)
{
- kfree(dma->bitmap);
+ kvfree(dma->bitmap);
dma->bitmap = NULL;
}
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index a09dedc79f68..05740cba1cd8 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -166,6 +166,7 @@ static int vhost_test_release(struct inode *inode, struct file *f)
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
vhost_test_flush(n);
+ kfree(n->dev.vqs);
kfree(n);
return 0;
}
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index e3c4f059b21a..851539807bc9 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -170,7 +170,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
* Userspace shouldn't remove status bits unless reset the
* status to 0.
*/
- if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
+ if (status != 0 && (status_old & ~status) != 0)
return -EINVAL;
if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
@@ -178,11 +178,11 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
vhost_vdpa_unsetup_vq_irq(v, i);
if (status == 0) {
- ret = ops->reset(vdpa);
+ ret = vdpa_reset(vdpa);
if (ret)
return ret;
} else
- ops->set_status(vdpa, status);
+ vdpa_set_status(vdpa, status);
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
@@ -195,7 +195,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
struct vhost_vdpa_config *c)
{
struct vdpa_device *vdpa = v->vdpa;
- long size = vdpa->config->get_config_size(vdpa);
+ size_t size = vdpa->config->get_config_size(vdpa);
if (c->len == 0 || c->off > size)
return -EINVAL;
@@ -262,7 +262,7 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
const struct vdpa_config_ops *ops = vdpa->config;
u64 features;
- features = ops->get_features(vdpa);
+ features = ops->get_device_features(vdpa);
if (copy_to_user(featurep, &features, sizeof(features)))
return -EFAULT;
@@ -286,7 +286,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
- if (vdpa_set_features(vdpa, features))
+ if (vdpa_set_features(vdpa, features, false))
return -EINVAL;
return 0;
diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c
index e2757ff1c23d..96e312a3eac7 100644
--- a/drivers/video/fbdev/vga16fb.c
+++ b/drivers/video/fbdev/vga16fb.c
@@ -184,6 +184,25 @@ static inline void setindex(int index)
vga_io_w(VGA_GFX_I, index);
}
+/* Check if the video mode is supported by the driver */
+static inline int check_mode_supported(void)
+{
+ /* non-x86 architectures treat orig_video_isVGA as a boolean flag */
+#if defined(CONFIG_X86)
+ /* only EGA and VGA in 16 color graphic mode are supported */
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EGAC &&
+ screen_info.orig_video_isVGA != VIDEO_TYPE_VGAC)
+ return -ENODEV;
+
+ if (screen_info.orig_video_mode != 0x0D && /* 320x200/4 (EGA) */
+ screen_info.orig_video_mode != 0x0E && /* 640x200/4 (EGA) */
+ screen_info.orig_video_mode != 0x10 && /* 640x350/4 (EGA) */
+ screen_info.orig_video_mode != 0x12) /* 640x480/4 (VGA) */
+ return -ENODEV;
+#endif
+ return 0;
+}
+
static void vga16fb_pan_var(struct fb_info *info,
struct fb_var_screeninfo *var)
{
@@ -1422,6 +1441,11 @@ static int __init vga16fb_init(void)
vga16fb_setup(option);
#endif
+
+ ret = check_mode_supported();
+ if (ret)
+ return ret;
+
ret = platform_driver_register(&vga16fb_driver);
if (!ret) {
diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c
index 80b2e3f0e276..5ff1c53740c0 100644
--- a/drivers/virt/acrn/ioreq.c
+++ b/drivers/virt/acrn/ioreq.c
@@ -246,8 +246,7 @@ void acrn_ioreq_request_clear(struct acrn_vm *vm)
spin_lock_bh(&vm->ioreq_clients_lock);
client = vm->default_client;
if (client) {
- vcpu = find_next_bit(client->ioreqs_map,
- ACRN_IO_REQUEST_MAX, 0);
+ vcpu = find_first_bit(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
while (vcpu < ACRN_IO_REQUEST_MAX) {
acrn_ioreq_complete_request(client, vcpu, NULL);
vcpu = find_next_bit(client->ioreqs_map,
diff --git a/drivers/virt/nitro_enclaves/Kconfig b/drivers/virt/nitro_enclaves/Kconfig
index f53740b941c0..2d3d98158121 100644
--- a/drivers/virt/nitro_enclaves/Kconfig
+++ b/drivers/virt/nitro_enclaves/Kconfig
@@ -14,3 +14,12 @@ config NITRO_ENCLAVES
To compile this driver as a module, choose M here.
The module will be called nitro_enclaves.
+
+config NITRO_ENCLAVES_MISC_DEV_TEST
+ bool "Tests for the misc device functionality of the Nitro Enclaves"
+ depends on NITRO_ENCLAVES && KUNIT=y
+ help
+ Enable KUnit tests for the misc device functionality of the Nitro
+ Enclaves. Select this option only if you will boot the kernel for
+ the purpose of running unit tests (e.g. under UML or qemu). If
+ unsure, say N.
diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c
index 6894ccb868a6..20c881b6a4b6 100644
--- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
+++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
@@ -24,6 +24,7 @@
#include <linux/nitro_enclaves.h>
#include <linux/pci.h>
#include <linux/poll.h>
+#include <linux/range.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <uapi/linux/vm_sockets.h>
@@ -126,6 +127,16 @@ struct ne_cpu_pool {
static struct ne_cpu_pool ne_cpu_pool;
/**
+ * struct ne_phys_contig_mem_regions - Contiguous physical memory regions.
+ * @num: The number of regions that currently has.
+ * @regions: The array of physical memory regions.
+ */
+struct ne_phys_contig_mem_regions {
+ unsigned long num;
+ struct range *regions;
+};
+
+/**
* ne_check_enclaves_created() - Verify if at least one enclave has been created.
* @void: No parameters provided.
*
@@ -825,6 +836,72 @@ static int ne_sanity_check_user_mem_region_page(struct ne_enclave *ne_enclave,
}
/**
+ * ne_sanity_check_phys_mem_region() - Sanity check the start address and the size
+ * of a physical memory region.
+ * @phys_mem_region_paddr : Physical start address of the region to be sanity checked.
+ * @phys_mem_region_size : Length of the region to be sanity checked.
+ *
+ * Context: Process context. This function is called with the ne_enclave mutex held.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_sanity_check_phys_mem_region(u64 phys_mem_region_paddr,
+ u64 phys_mem_region_size)
+{
+ if (phys_mem_region_size & (NE_MIN_MEM_REGION_SIZE - 1)) {
+ dev_err_ratelimited(ne_misc_dev.this_device,
+ "Physical mem region size is not multiple of 2 MiB\n");
+
+ return -EINVAL;
+ }
+
+ if (!IS_ALIGNED(phys_mem_region_paddr, NE_MIN_MEM_REGION_SIZE)) {
+ dev_err_ratelimited(ne_misc_dev.this_device,
+ "Physical mem region address is not 2 MiB aligned\n");
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_merge_phys_contig_memory_regions() - Add a memory region and merge the adjacent
+ * regions if they are physically contiguous.
+ * @phys_contig_regions : Private data associated with the contiguous physical memory regions.
+ * @page_paddr : Physical start address of the region to be added.
+ * @page_size : Length of the region to be added.
+ *
+ * Context: Process context. This function is called with the ne_enclave mutex held.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int
+ne_merge_phys_contig_memory_regions(struct ne_phys_contig_mem_regions *phys_contig_regions,
+ u64 page_paddr, u64 page_size)
+{
+ unsigned long num = phys_contig_regions->num;
+ int rc = 0;
+
+ rc = ne_sanity_check_phys_mem_region(page_paddr, page_size);
+ if (rc < 0)
+ return rc;
+
+ /* Physically contiguous, just merge */
+ if (num && (phys_contig_regions->regions[num - 1].end + 1) == page_paddr) {
+ phys_contig_regions->regions[num - 1].end += page_size;
+ } else {
+ phys_contig_regions->regions[num].start = page_paddr;
+ phys_contig_regions->regions[num].end = page_paddr + page_size - 1;
+ phys_contig_regions->num++;
+ }
+
+ return 0;
+}
+
+/**
* ne_set_user_memory_region_ioctl() - Add user space memory region to the slot
* associated with the current enclave.
* @ne_enclave : Private data associated with the current enclave.
@@ -843,9 +920,8 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
unsigned long max_nr_pages = 0;
unsigned long memory_size = 0;
struct ne_mem_region *ne_mem_region = NULL;
- unsigned long nr_phys_contig_mem_regions = 0;
struct pci_dev *pdev = ne_devs.ne_pci_dev->pdev;
- struct page **phys_contig_mem_regions = NULL;
+ struct ne_phys_contig_mem_regions phys_contig_mem_regions = {};
int rc = -EINVAL;
rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region);
@@ -866,9 +942,10 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
goto free_mem_region;
}
- phys_contig_mem_regions = kcalloc(max_nr_pages, sizeof(*phys_contig_mem_regions),
- GFP_KERNEL);
- if (!phys_contig_mem_regions) {
+ phys_contig_mem_regions.regions = kcalloc(max_nr_pages,
+ sizeof(*phys_contig_mem_regions.regions),
+ GFP_KERNEL);
+ if (!phys_contig_mem_regions.regions) {
rc = -ENOMEM;
goto free_mem_region;
@@ -902,26 +979,18 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
if (rc < 0)
goto put_pages;
- /*
- * TODO: Update once handled non-contiguous memory regions
- * received from user space or contiguous physical memory regions
- * larger than 2 MiB e.g. 8 MiB.
- */
- phys_contig_mem_regions[i] = ne_mem_region->pages[i];
+ rc = ne_merge_phys_contig_memory_regions(&phys_contig_mem_regions,
+ page_to_phys(ne_mem_region->pages[i]),
+ page_size(ne_mem_region->pages[i]));
+ if (rc < 0)
+ goto put_pages;
memory_size += page_size(ne_mem_region->pages[i]);
ne_mem_region->nr_pages++;
} while (memory_size < mem_region.memory_size);
- /*
- * TODO: Update once handled non-contiguous memory regions received
- * from user space or contiguous physical memory regions larger than
- * 2 MiB e.g. 8 MiB.
- */
- nr_phys_contig_mem_regions = ne_mem_region->nr_pages;
-
- if ((ne_enclave->nr_mem_regions + nr_phys_contig_mem_regions) >
+ if ((ne_enclave->nr_mem_regions + phys_contig_mem_regions.num) >
ne_enclave->max_mem_regions) {
dev_err_ratelimited(ne_misc_dev.this_device,
"Reached max memory regions %lld\n",
@@ -932,27 +1001,13 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
goto put_pages;
}
- for (i = 0; i < nr_phys_contig_mem_regions; i++) {
- u64 phys_region_addr = page_to_phys(phys_contig_mem_regions[i]);
- u64 phys_region_size = page_size(phys_contig_mem_regions[i]);
-
- if (phys_region_size & (NE_MIN_MEM_REGION_SIZE - 1)) {
- dev_err_ratelimited(ne_misc_dev.this_device,
- "Physical mem region size is not multiple of 2 MiB\n");
-
- rc = -EINVAL;
-
- goto put_pages;
- }
-
- if (!IS_ALIGNED(phys_region_addr, NE_MIN_MEM_REGION_SIZE)) {
- dev_err_ratelimited(ne_misc_dev.this_device,
- "Physical mem region address is not 2 MiB aligned\n");
-
- rc = -EINVAL;
+ for (i = 0; i < phys_contig_mem_regions.num; i++) {
+ u64 phys_region_addr = phys_contig_mem_regions.regions[i].start;
+ u64 phys_region_size = range_len(&phys_contig_mem_regions.regions[i]);
+ rc = ne_sanity_check_phys_mem_region(phys_region_addr, phys_region_size);
+ if (rc < 0)
goto put_pages;
- }
}
ne_mem_region->memory_size = mem_region.memory_size;
@@ -960,13 +1015,13 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
list_add(&ne_mem_region->mem_region_list_entry, &ne_enclave->mem_regions_list);
- for (i = 0; i < nr_phys_contig_mem_regions; i++) {
+ for (i = 0; i < phys_contig_mem_regions.num; i++) {
struct ne_pci_dev_cmd_reply cmd_reply = {};
struct slot_add_mem_req slot_add_mem_req = {};
slot_add_mem_req.slot_uid = ne_enclave->slot_uid;
- slot_add_mem_req.paddr = page_to_phys(phys_contig_mem_regions[i]);
- slot_add_mem_req.size = page_size(phys_contig_mem_regions[i]);
+ slot_add_mem_req.paddr = phys_contig_mem_regions.regions[i].start;
+ slot_add_mem_req.size = range_len(&phys_contig_mem_regions.regions[i]);
rc = ne_do_request(pdev, SLOT_ADD_MEM,
&slot_add_mem_req, sizeof(slot_add_mem_req),
@@ -975,7 +1030,7 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
dev_err_ratelimited(ne_misc_dev.this_device,
"Error in slot add mem [rc=%d]\n", rc);
- kfree(phys_contig_mem_regions);
+ kfree(phys_contig_mem_regions.regions);
/*
* Exit here without put pages as memory regions may
@@ -988,7 +1043,7 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
ne_enclave->nr_mem_regions++;
}
- kfree(phys_contig_mem_regions);
+ kfree(phys_contig_mem_regions.regions);
return 0;
@@ -996,7 +1051,7 @@ put_pages:
for (i = 0; i < ne_mem_region->nr_pages; i++)
put_page(ne_mem_region->pages[i]);
free_mem_region:
- kfree(phys_contig_mem_regions);
+ kfree(phys_contig_mem_regions.regions);
kfree(ne_mem_region->pages);
kfree(ne_mem_region);
@@ -1702,8 +1757,37 @@ static long ne_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
}
+#if defined(CONFIG_NITRO_ENCLAVES_MISC_DEV_TEST)
+#include "ne_misc_dev_test.c"
+
+static inline int ne_misc_dev_test_init(void)
+{
+ return __kunit_test_suites_init(ne_misc_dev_test_suites);
+}
+
+static inline void ne_misc_dev_test_exit(void)
+{
+ __kunit_test_suites_exit(ne_misc_dev_test_suites);
+}
+#else
+static inline int ne_misc_dev_test_init(void)
+{
+ return 0;
+}
+
+static inline void ne_misc_dev_test_exit(void)
+{
+}
+#endif
+
static int __init ne_init(void)
{
+ int rc = 0;
+
+ rc = ne_misc_dev_test_init();
+ if (rc < 0)
+ return rc;
+
mutex_init(&ne_cpu_pool.mutex);
return pci_register_driver(&ne_pci_driver);
@@ -1714,6 +1798,8 @@ static void __exit ne_exit(void)
pci_unregister_driver(&ne_pci_driver);
ne_teardown_cpu_pool();
+
+ ne_misc_dev_test_exit();
}
module_init(ne_init);
diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev_test.c b/drivers/virt/nitro_enclaves/ne_misc_dev_test.c
new file mode 100644
index 000000000000..265797bed0ea
--- /dev/null
+++ b/drivers/virt/nitro_enclaves/ne_misc_dev_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/test.h>
+
+#define MAX_PHYS_REGIONS 16
+#define INVALID_VALUE (~0ull)
+
+struct ne_phys_regions_test {
+ u64 paddr;
+ u64 size;
+ int expect_rc;
+ unsigned long expect_num;
+ u64 expect_last_paddr;
+ u64 expect_last_size;
+} phys_regions_test_cases[] = {
+ /*
+ * Add the region from 0x1000 to (0x1000 + 0x200000 - 1):
+ * Expected result:
+ * Failed, start address is not 2M-aligned
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 0
+ * regions = {}
+ */
+ {0x1000, 0x200000, -EINVAL, 0, INVALID_VALUE, INVALID_VALUE},
+
+ /*
+ * Add the region from 0x200000 to (0x200000 + 0x1000 - 1):
+ * Expected result:
+ * Failed, size is not 2M-aligned
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 0
+ * regions = {}
+ */
+ {0x200000, 0x1000, -EINVAL, 0, INVALID_VALUE, INVALID_VALUE},
+
+ /*
+ * Add the region from 0x200000 to (0x200000 + 0x200000 - 1):
+ * Expected result:
+ * Successful
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 1
+ * regions = {
+ * {start=0x200000, end=0x3fffff}, // len=0x200000
+ * }
+ */
+ {0x200000, 0x200000, 0, 1, 0x200000, 0x200000},
+
+ /*
+ * Add the region from 0x0 to (0x0 + 0x200000 - 1):
+ * Expected result:
+ * Successful
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 2
+ * regions = {
+ * {start=0x200000, end=0x3fffff}, // len=0x200000
+ * {start=0x0, end=0x1fffff}, // len=0x200000
+ * }
+ */
+ {0x0, 0x200000, 0, 2, 0x0, 0x200000},
+
+ /*
+ * Add the region from 0x600000 to (0x600000 + 0x400000 - 1):
+ * Expected result:
+ * Successful
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 3
+ * regions = {
+ * {start=0x200000, end=0x3fffff}, // len=0x200000
+ * {start=0x0, end=0x1fffff}, // len=0x200000
+ * {start=0x600000, end=0x9fffff}, // len=0x400000
+ * }
+ */
+ {0x600000, 0x400000, 0, 3, 0x600000, 0x400000},
+
+ /*
+ * Add the region from 0xa00000 to (0xa00000 + 0x400000 - 1):
+ * Expected result:
+ * Successful, merging case!
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 3
+ * regions = {
+ * {start=0x200000, end=0x3fffff}, // len=0x200000
+ * {start=0x0, end=0x1fffff}, // len=0x200000
+ * {start=0x600000, end=0xdfffff}, // len=0x800000
+ * }
+ */
+ {0xa00000, 0x400000, 0, 3, 0x600000, 0x800000},
+
+ /*
+ * Add the region from 0x1000 to (0x1000 + 0x200000 - 1):
+ * Expected result:
+ * Failed, start address is not 2M-aligned
+ *
+ * Now the instance of struct ne_phys_contig_mem_regions is:
+ * num = 3
+ * regions = {
+ * {start=0x200000, end=0x3fffff}, // len=0x200000
+ * {start=0x0, end=0x1fffff}, // len=0x200000
+ * {start=0x600000, end=0xdfffff}, // len=0x800000
+ * }
+ */
+ {0x1000, 0x200000, -EINVAL, 3, 0x600000, 0x800000},
+};
+
+static void ne_misc_dev_test_merge_phys_contig_memory_regions(struct kunit *test)
+{
+ struct ne_phys_contig_mem_regions phys_contig_mem_regions = {};
+ int rc = 0;
+ int i = 0;
+
+ phys_contig_mem_regions.regions = kunit_kcalloc(test, MAX_PHYS_REGIONS,
+ sizeof(*phys_contig_mem_regions.regions),
+ GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, phys_contig_mem_regions.regions);
+
+ for (i = 0; i < ARRAY_SIZE(phys_regions_test_cases); i++) {
+ struct ne_phys_regions_test *test_case = &phys_regions_test_cases[i];
+ unsigned long num = 0;
+
+ rc = ne_merge_phys_contig_memory_regions(&phys_contig_mem_regions,
+ test_case->paddr, test_case->size);
+ KUNIT_EXPECT_EQ(test, rc, test_case->expect_rc);
+ KUNIT_EXPECT_EQ(test, phys_contig_mem_regions.num, test_case->expect_num);
+
+ if (test_case->expect_last_paddr == INVALID_VALUE)
+ continue;
+
+ num = phys_contig_mem_regions.num;
+ KUNIT_EXPECT_EQ(test, phys_contig_mem_regions.regions[num - 1].start,
+ test_case->expect_last_paddr);
+ KUNIT_EXPECT_EQ(test, range_len(&phys_contig_mem_regions.regions[num - 1]),
+ test_case->expect_last_size);
+ }
+
+ kunit_kfree(test, phys_contig_mem_regions.regions);
+}
+
+static struct kunit_case ne_misc_dev_test_cases[] = {
+ KUNIT_CASE(ne_misc_dev_test_merge_phys_contig_memory_regions),
+ {}
+};
+
+static struct kunit_suite ne_misc_dev_test_suite = {
+ .name = "ne_misc_dev_test",
+ .test_cases = ne_misc_dev_test_cases,
+};
+
+static struct kunit_suite *ne_misc_dev_test_suites[] = {
+ &ne_misc_dev_test_suite,
+ NULL
+};
diff --git a/drivers/virt/nitro_enclaves/ne_pci_dev.c b/drivers/virt/nitro_enclaves/ne_pci_dev.c
index 40b49ec8e30b..6b81e8f3a5dc 100644
--- a/drivers/virt/nitro_enclaves/ne_pci_dev.c
+++ b/drivers/virt/nitro_enclaves/ne_pci_dev.c
@@ -376,7 +376,6 @@ static void ne_teardown_msix(struct pci_dev *pdev)
free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
flush_work(&ne_pci_dev->notify_work);
- flush_workqueue(ne_pci_dev->event_wq);
destroy_workqueue(ne_pci_dev->event_wq);
free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 236081afe9a2..00ac9db792a4 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -204,6 +204,12 @@ int virtio_finalize_features(struct virtio_device *dev)
}
EXPORT_SYMBOL_GPL(virtio_finalize_features);
+void virtio_reset_device(struct virtio_device *dev)
+{
+ dev->config->reset(dev);
+}
+EXPORT_SYMBOL_GPL(virtio_reset_device);
+
static int virtio_dev_probe(struct device *_d)
{
int err, i;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c22ff0117b46..f4c34a2a6b8e 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -1056,7 +1056,7 @@ static void remove_common(struct virtio_balloon *vb)
return_free_pages_to_mm(vb, ULONG_MAX);
/* Now we reset the device so we can clean up the queues. */
- vb->vdev->config->reset(vb->vdev);
+ virtio_reset_device(vb->vdev);
vb->vdev->config->del_vqs(vb->vdev);
}
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index ce51ae165943..3aa46703872d 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -347,7 +347,7 @@ static void virtinput_remove(struct virtio_device *vdev)
spin_unlock_irqrestore(&vi->lock, flags);
input_unregister_device(vi->idev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
while ((buf = virtqueue_detach_unused_buf(vi->sts)) != NULL)
kfree(buf);
vdev->config->del_vqs(vdev);
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 96e5a8782769..38becd8d578c 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -20,6 +20,7 @@
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/lockdep.h>
+#include <linux/log2.h>
#include <acpi/acpi_numa.h>
@@ -592,7 +593,7 @@ static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
return -ENOMEM;
mutex_lock(&vm->hotplug_mutex);
- if (new_bitmap)
+ if (vm->sbm.sb_states)
memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
old_bitmap = vm->sbm.sb_states;
@@ -1120,15 +1121,18 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn,
*/
static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
{
- const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES;
+ unsigned long order = MAX_ORDER - 1;
unsigned long i;
/*
- * We are always called at least with MAX_ORDER_NR_PAGES
- * granularity/alignment (e.g., the way subblocks work). All pages
- * inside such a block are alike.
+ * We might get called for ranges that don't cover properly aligned
+ * MAX_ORDER - 1 pages; however, we can only online properly aligned
+ * pages with an order of MAX_ORDER - 1 at maximum.
*/
- for (i = 0; i < nr_pages; i += max_nr_pages) {
+ while (!IS_ALIGNED(pfn | nr_pages, 1 << order))
+ order--;
+
+ for (i = 0; i < nr_pages; i += 1 << order) {
struct page *page = pfn_to_page(pfn + i);
/*
@@ -1138,14 +1142,12 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
* alike.
*/
if (PageDirty(page)) {
- virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
- false);
- generic_online_page(page, MAX_ORDER - 1);
+ virtio_mem_clear_fake_offline(pfn + i, 1 << order, false);
+ generic_online_page(page, order);
} else {
- virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
- true);
- free_contig_range(pfn + i, max_nr_pages);
- adjust_managed_page_count(page, max_nr_pages);
+ virtio_mem_clear_fake_offline(pfn + i, 1 << order, true);
+ free_contig_range(pfn + i, 1 << order);
+ adjust_managed_page_count(page, 1 << order);
}
}
}
@@ -1228,28 +1230,46 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
page_ref_inc(pfn_to_page(pfn + i));
}
-static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
+static void virtio_mem_online_page(struct virtio_mem *vm,
+ struct page *page, unsigned int order)
{
- const unsigned long addr = page_to_phys(page);
- unsigned long id, sb_id;
- struct virtio_mem *vm;
+ const unsigned long start = page_to_phys(page);
+ const unsigned long end = start + PFN_PHYS(1 << order);
+ unsigned long addr, next, id, sb_id, count;
bool do_online;
- rcu_read_lock();
- list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
- if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
- continue;
+ /*
+ * We can get called with any order up to MAX_ORDER - 1. If our
+ * subblock size is smaller than that and we have a mixture of plugged
+ * and unplugged subblocks within such a page, we have to process in
+ * smaller granularity. In that case we'll adjust the order exactly once
+ * within the loop.
+ */
+ for (addr = start; addr < end; ) {
+ next = addr + PFN_PHYS(1 << order);
if (vm->in_sbm) {
- /*
- * We exploit here that subblocks have at least
- * MAX_ORDER_NR_PAGES size/alignment - so we cannot
- * cross subblocks within one call.
- */
id = virtio_mem_phys_to_mb_id(addr);
sb_id = virtio_mem_phys_to_sb_id(vm, addr);
- do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
- sb_id, 1);
+ count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1;
+
+ if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) {
+ /* Fully plugged. */
+ do_online = true;
+ } else if (count == 1 ||
+ virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) {
+ /* Fully unplugged. */
+ do_online = false;
+ } else {
+ /*
+ * Mixture, process sub-blocks instead. This
+ * will be at least the size of a pageblock.
+ * We'll run into this case exactly once.
+ */
+ order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT;
+ do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1);
+ continue;
+ }
} else {
/*
* If the whole block is marked fake offline, keep
@@ -1260,18 +1280,38 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
}
+ if (do_online)
+ generic_online_page(pfn_to_page(PFN_DOWN(addr)), order);
+ else
+ virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
+ false);
+ addr = next;
+ }
+}
+
+static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
+{
+ const unsigned long addr = page_to_phys(page);
+ struct virtio_mem *vm;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
+ /*
+ * Pages we're onlining will never cross memory blocks and,
+ * therefore, not virtio-mem devices.
+ */
+ if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
+ continue;
+
/*
- * virtio_mem_set_fake_offline() might sleep, we don't need
- * the device anymore. See virtio_mem_remove() how races
+ * virtio_mem_set_fake_offline() might sleep. We can safely
+ * drop the RCU lock at this point because the device
+ * cannot go away. See virtio_mem_remove() how races
* between memory onlining and device removal are handled.
*/
rcu_read_unlock();
- if (do_online)
- generic_online_page(page, order);
- else
- virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
- false);
+ virtio_mem_online_page(vm, page, order);
return;
}
rcu_read_unlock();
@@ -2438,8 +2478,6 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
/*
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
* pageblock_nr_pages pages. This:
- * - Simplifies our page onlining code (virtio_mem_online_page_cb)
- * and fake page onlining code (virtio_mem_fake_online).
* - Is required for now for alloc_contig_range() to work reliably -
* it doesn't properly handle smaller granularity on ZONE_NORMAL.
*/
@@ -2850,7 +2888,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
virtio_mem_deinit_hotplug(vm);
/* reset the device and cleanup the queues */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
kfree(vm);
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index b3f8128b7983..34141b9abe27 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -138,7 +138,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
if (q_pfn >> 32) {
dev_err(&vp_dev->pci_dev->dev,
- "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n",
+ "platform bug: legacy virtio-pci must not be used with RAM above 0x%llxGB\n",
0x1ULL << (32 + PAGE_SHIFT - 30));
err = -E2BIG;
goto out_del_vq;
diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c
index 9b97680dd02b..677d1f68bc9b 100644
--- a/drivers/virtio/virtio_pci_legacy_dev.c
+++ b/drivers/virtio/virtio_pci_legacy_dev.c
@@ -45,8 +45,10 @@ int vp_legacy_probe(struct virtio_pci_legacy_device *ldev)
return rc;
ldev->ioaddr = pci_iomap(pci_dev, 0, 0);
- if (!ldev->ioaddr)
+ if (!ldev->ioaddr) {
+ rc = -EIO;
goto err_iomap;
+ }
ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR;
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index e11ed748e661..e8b3ff2b9fbc 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -345,7 +345,7 @@ err_map_common:
EXPORT_SYMBOL_GPL(vp_modern_probe);
/*
- * vp_modern_probe: remove and cleanup the modern virtio pci device
+ * vp_modern_remove: remove and cleanup the modern virtio pci device
* @mdev: the modern virtio-pci device
*/
void vp_modern_remove(struct virtio_pci_modern_device *mdev)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 028b05d44546..962f1477b1fa 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1197,8 +1197,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
if (virtqueue_use_indirect(_vq, total_sg)) {
err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
in_sgs, data, gfp);
- if (err != -ENOMEM)
+ if (err != -ENOMEM) {
+ END_USE(vq);
return err;
+ }
/* fall back on direct */
}
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index f85f860bc10b..7767a7f0119b 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -91,9 +91,8 @@ static u8 virtio_vdpa_get_status(struct virtio_device *vdev)
static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
- const struct vdpa_config_ops *ops = vdpa->config;
- return ops->set_status(vdpa, status);
+ return vdpa_set_status(vdpa, status);
}
static void virtio_vdpa_reset(struct virtio_device *vdev)
@@ -308,7 +307,7 @@ static u64 virtio_vdpa_get_features(struct virtio_device *vdev)
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
- return ops->get_features(vdpa);
+ return ops->get_device_features(vdpa);
}
static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
@@ -318,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
- return vdpa_set_features(vdpa, vdev->features);
+ return vdpa_set_features(vdpa, vdev->features, false);
}
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c
index e4f336111edc..6cef6e2edb89 100644
--- a/drivers/w1/slaves/w1_ds28e04.c
+++ b/drivers/w1/slaves/w1_ds28e04.c
@@ -32,7 +32,7 @@ static int w1_strong_pullup = 1;
module_param_named(strong_pullup, w1_strong_pullup, int, 0);
/* enable/disable CRC checking on DS28E04-100 memory accesses */
-static char w1_enable_crccheck = 1;
+static bool w1_enable_crccheck = true;
#define W1_EEPROM_SIZE 512
#define W1_PAGE_COUNT 16
@@ -339,32 +339,18 @@ static BIN_ATTR_RW(pio, 1);
static ssize_t crccheck_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- if (put_user(w1_enable_crccheck + 0x30, buf))
- return -EFAULT;
-
- return sizeof(w1_enable_crccheck);
+ return sysfs_emit(buf, "%d\n", w1_enable_crccheck);
}
static ssize_t crccheck_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- char val;
-
- if (count != 1 || !buf)
- return -EINVAL;
+ int err = kstrtobool(buf, &w1_enable_crccheck);
- if (get_user(val, buf))
- return -EFAULT;
+ if (err)
+ return err;
- /* convert to decimal */
- val = val - 0x30;
- if (val != 0 && val != 1)
- return -EINVAL;
-
- /* set the new value */
- w1_enable_crccheck = val;
-
- return sizeof(w1_enable_crccheck);
+ return count;
}
static DEVICE_ATTR_RW(crccheck);
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index ca70c5f03206..565578002d79 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -1785,7 +1785,7 @@ static ssize_t alarms_store(struct device *device,
u8 new_config_register[3]; /* array of data to be written */
int temp, ret;
char *token = NULL;
- s8 tl, th, tt; /* 1 byte per value + temp ring order */
+ s8 tl, th; /* 1 byte per value + temp ring order */
char *p_args, *orig;
p_args = orig = kmalloc(size, GFP_KERNEL);
@@ -1836,9 +1836,8 @@ static ssize_t alarms_store(struct device *device,
th = int_to_short(temp);
/* Reorder if required th and tl */
- if (tl > th) {
- tt = tl; tl = th; th = tt;
- }
+ if (tl > th)
+ swap(tl, th);
/*
* Read the scratchpad to change only the required bits
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1dc86eb1361a..c8fa79da23b3 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -207,6 +207,7 @@ config DA9055_WATCHDOG
config DA9063_WATCHDOG
tristate "Dialog DA9063 Watchdog"
depends on MFD_DA9063 || COMPILE_TEST
+ depends on I2C
select WATCHDOG_CORE
help
Support for the watchdog in the DA9063 PMIC.
@@ -680,10 +681,10 @@ config MAX77620_WATCHDOG
depends on MFD_MAX77620 || COMPILE_TEST
select WATCHDOG_CORE
help
- This is the driver for the Max77620 watchdog timer.
- Say 'Y' here to enable the watchdog timer support for
- MAX77620 chips. To compile this driver as a module,
- choose M here: the module will be called max77620_wdt.
+ This is the driver for the Max77620 watchdog timer.
+ Say 'Y' here to enable the watchdog timer support for
+ MAX77620 chips. To compile this driver as a module,
+ choose M here: the module will be called max77620_wdt.
config IMX2_WDT
tristate "IMX2+ Watchdog"
@@ -822,6 +823,7 @@ config MESON_WATCHDOG
config MEDIATEK_WATCHDOG
tristate "Mediatek SoCs watchdog support"
depends on ARCH_MEDIATEK || COMPILE_TEST
+ default ARCH_MEDIATEK
select WATCHDOG_CORE
select RESET_CONTROLLER
help
@@ -881,6 +883,14 @@ config RENESAS_RZAWDT
This driver adds watchdog support for the integrated watchdogs in the
Renesas RZ/A SoCs. These watchdogs can be used to reset a system.
+config RENESAS_RZG2LWDT
+ tristate "Renesas RZ/G2L WDT Watchdog"
+ depends on ARCH_RENESAS || COMPILE_TEST
+ select WATCHDOG_CORE
+ help
+ This driver adds watchdog support for the integrated watchdogs in the
+ Renesas RZ/G2L SoCs. These watchdogs can be used to reset a system.
+
config ASPEED_WATCHDOG
tristate "Aspeed BMC watchdog support"
depends on ARCH_ASPEED || COMPILE_TEST
@@ -940,6 +950,19 @@ config RTD119X_WATCHDOG
Say Y here to include support for the watchdog timer in
Realtek RTD1295 SoCs.
+config REALTEK_OTTO_WDT
+ tristate "Realtek Otto MIPS watchdog support"
+ depends on MACH_REALTEK_RTL || COMPILE_TEST
+ depends on COMMON_CLK
+ select WATCHDOG_CORE
+ default MACH_REALTEK_RTL
+ help
+ Say Y here to include support for the watchdog timer on Realtek
+ RTL838x, RTL839x, RTL930x SoCs. This watchdog has pretimeout
+ notifications and system reset on timeout.
+
+ When built as a module this will be called realtek_otto_wdt.
+
config SPRD_WATCHDOG
tristate "Spreadtrum watchdog support"
depends on ARCH_SPRD || COMPILE_TEST
@@ -976,6 +999,18 @@ config MSC313E_WATCHDOG
To compile this driver as a module, choose M here: the
module will be called msc313e_wdt.
+config APPLE_WATCHDOG
+ tristate "Apple SoC watchdog"
+ depends on ARCH_APPLE || COMPILE_TEST
+ select WATCHDOG_CORE
+ help
+ Say Y here to include support for the Watchdog found in Apple
+ SoCs such as the M1. Next to the common watchdog features this
+ driver is also required in order to reboot these SoCs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called apple_wdt.
+
# X86 (i386 + ia64 + x86_64) Architecture
config ACQUIRE_WDT
@@ -1440,26 +1475,26 @@ config TQMX86_WDT
depends on X86
select WATCHDOG_CORE
help
- This is the driver for the hardware watchdog timer in the TQMX86 IO
- controller found on some of their ComExpress Modules.
+ This is the driver for the hardware watchdog timer in the TQMX86 IO
+ controller found on some of their ComExpress Modules.
- To compile this driver as a module, choose M here; the module
- will be called tqmx86_wdt.
+ To compile this driver as a module, choose M here; the module
+ will be called tqmx86_wdt.
- Most people will say N.
+ Most people will say N.
config VIA_WDT
tristate "VIA Watchdog Timer"
depends on X86 && PCI
select WATCHDOG_CORE
help
- This is the driver for the hardware watchdog timer on VIA
- southbridge chipset CX700, VX800/VX820 or VX855/VX875.
+ This is the driver for the hardware watchdog timer on VIA
+ southbridge chipset CX700, VX800/VX820 or VX855/VX875.
- To compile this driver as a module, choose M here; the module
- will be called via_wdt.
+ To compile this driver as a module, choose M here; the module
+ will be called via_wdt.
- Most people will say N.
+ Most people will say N.
config W83627HF_WDT
tristate "Watchdog timer for W83627HF/W83627DHG and compatibles"
@@ -1707,16 +1742,6 @@ config OCTEON_WDT
from the first interrupt, it is then only poked when the
device is written.
-config BCM63XX_WDT
- tristate "Broadcom BCM63xx hardware watchdog"
- depends on BCM63XX
- help
- Watchdog driver for the built in watchdog hardware in Broadcom
- BCM63xx SoC.
-
- To compile this driver as a loadable module, choose M here.
- The module will be called bcm63xx_wdt.
-
config BCM2835_WDT
tristate "Broadcom BCM2835 hardware watchdog"
depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
@@ -1751,15 +1776,16 @@ config BCM_KONA_WDT_DEBUG
If in doubt, say 'N'.
config BCM7038_WDT
- tristate "BCM7038 Watchdog"
+ tristate "BCM63xx/BCM7038 Watchdog"
select WATCHDOG_CORE
depends on HAS_IOMEM
- depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST
+ depends on ARCH_BRCMSTB || BMIPS_GENERIC || BCM63XX || COMPILE_TEST
help
- Watchdog driver for the built-in hardware in Broadcom 7038 and
- later SoCs used in set-top boxes. BCM7038 was made public
- during the 2004 CES, and since then, many Broadcom chips use this
- watchdog block, including some cable modem chips.
+ Watchdog driver for the built-in hardware in Broadcom 7038 and
+ later SoCs used in set-top boxes. BCM7038 was made public
+ during the 2004 CES, and since then, many Broadcom chips use this
+ watchdog block, including some cable modem chips and DSL (63xx)
+ chips.
config IMGPDC_WDT
tristate "Imagination Technologies PDC Watchdog Timer"
@@ -2120,12 +2146,12 @@ config KEEMBAY_WATCHDOG
depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST)
select WATCHDOG_CORE
help
- This option enable support for an In-secure watchdog timer driver for
- Intel Keem Bay SoC. This WDT has a 32 bit timer and decrements in every
- count unit. An interrupt will be triggered, when the count crosses
- the threshold configured in the register.
+ This option enable support for an In-secure watchdog timer driver for
+ Intel Keem Bay SoC. This WDT has a 32 bit timer and decrements in every
+ count unit. An interrupt will be triggered, when the count crosses
+ the threshold configured in the register.
- To compile this driver as a module, choose M here: the
- module will be called keembay_wdt.
+ To compile this driver as a module, choose M here: the
+ module will be called keembay_wdt.
endif # WATCHDOG
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 31b931846e32..f7da867e8782 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_LPC18XX_WATCHDOG) += lpc18xx_wdt.o
obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o
obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o
+obj-$(CONFIG_RENESAS_RZG2LWDT) += rzg2l_wdt.o
obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o
@@ -93,6 +94,7 @@ obj-$(CONFIG_PM8916_WATCHDOG) += pm8916_wdt.o
obj-$(CONFIG_ARM_SMC_WATCHDOG) += arm_smc_wdt.o
obj-$(CONFIG_VISCONTI_WATCHDOG) += visconti_wdt.o
obj-$(CONFIG_MSC313E_WATCHDOG) += msc313e_wdt.o
+obj-$(CONFIG_APPLE_WATCHDOG) += apple_wdt.o
# X86 (i386 + ia64 + x86_64) Architecture
obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
@@ -154,7 +156,6 @@ obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o
# MIPS Architecture
obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o
obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o
-obj-$(CONFIG_BCM63XX_WDT) += bcm63xx_wdt.o
obj-$(CONFIG_RC32434_WDT) += rc32434_wdt.o
obj-$(CONFIG_INDYDOG) += indydog.o
obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o
@@ -171,6 +172,7 @@ obj-$(CONFIG_IMGPDC_WDT) += imgpdc_wdt.o
obj-$(CONFIG_MT7621_WDT) += mt7621_wdt.o
obj-$(CONFIG_PIC32_WDT) += pic32-wdt.o
obj-$(CONFIG_PIC32_DMT) += pic32-dmt.o
+obj-$(CONFIG_REALTEK_OTTO_WDT) += realtek_otto_wdt.o
# PARISC Architecture
diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c
new file mode 100644
index 000000000000..16aca21f13d6
--- /dev/null
+++ b/drivers/watchdog/apple_wdt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Apple SoC Watchdog driver
+ *
+ * Copyright (C) The Asahi Linux Contributors
+ */
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/*
+ * Apple Watchdog MMIO registers
+ *
+ * This HW block has three separate watchdogs. WD0 resets the machine
+ * to recovery mode and is not very useful for us. WD1 and WD2 trigger a normal
+ * machine reset. WD0 additionally supports a configurable interrupt.
+ * This information can be used to implement pretimeout support at a later time.
+ *
+ * APPLE_WDT_WDx_CUR_TIME is a simple counter incremented for each tick of the
+ * reference clock. It can also be overwritten to any value.
+ * Whenever APPLE_WDT_CTRL_RESET_EN is set in APPLE_WDT_WDx_CTRL and
+ * APPLE_WDT_WDx_CUR_TIME >= APPLE_WDT_WDx_BITE_TIME the entire machine is
+ * reset.
+ * Whenever APPLE_WDT_CTRL_IRQ_EN is set and APPLE_WDTx_WD1_CUR_TIME >=
+ * APPLE_WDTx_WD1_BARK_TIME an interrupt is triggered and
+ * APPLE_WDT_CTRL_IRQ_STATUS is set. The interrupt can be cleared by writing
+ * 1 to APPLE_WDT_CTRL_IRQ_STATUS.
+ */
+#define APPLE_WDT_WD0_CUR_TIME 0x00
+#define APPLE_WDT_WD0_BITE_TIME 0x04
+#define APPLE_WDT_WD0_BARK_TIME 0x08
+#define APPLE_WDT_WD0_CTRL 0x0c
+
+#define APPLE_WDT_WD1_CUR_TIME 0x10
+#define APPLE_WDT_WD1_BITE_TIME 0x14
+#define APPLE_WDT_WD1_CTRL 0x1c
+
+#define APPLE_WDT_WD2_CUR_TIME 0x20
+#define APPLE_WDT_WD2_BITE_TIME 0x24
+#define APPLE_WDT_WD2_CTRL 0x2c
+
+#define APPLE_WDT_CTRL_IRQ_EN BIT(0)
+#define APPLE_WDT_CTRL_IRQ_STATUS BIT(1)
+#define APPLE_WDT_CTRL_RESET_EN BIT(2)
+
+#define APPLE_WDT_TIMEOUT_DEFAULT 30
+
+struct apple_wdt {
+ struct watchdog_device wdd;
+ void __iomem *regs;
+ unsigned long clk_rate;
+};
+
+static struct apple_wdt *to_apple_wdt(struct watchdog_device *wdd)
+{
+ return container_of(wdd, struct apple_wdt, wdd);
+}
+
+static int apple_wdt_start(struct watchdog_device *wdd)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+ writel_relaxed(APPLE_WDT_CTRL_RESET_EN, wdt->regs + APPLE_WDT_WD1_CTRL);
+
+ return 0;
+}
+
+static int apple_wdt_stop(struct watchdog_device *wdd)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CTRL);
+
+ return 0;
+}
+
+static int apple_wdt_ping(struct watchdog_device *wdd)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+
+ return 0;
+}
+
+static int apple_wdt_set_timeout(struct watchdog_device *wdd, unsigned int s)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+ writel_relaxed(wdt->clk_rate * s, wdt->regs + APPLE_WDT_WD1_BITE_TIME);
+
+ wdd->timeout = s;
+
+ return 0;
+}
+
+static unsigned int apple_wdt_get_timeleft(struct watchdog_device *wdd)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+ u32 cur_time, reset_time;
+
+ cur_time = readl_relaxed(wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+ reset_time = readl_relaxed(wdt->regs + APPLE_WDT_WD1_BITE_TIME);
+
+ return (reset_time - cur_time) / wdt->clk_rate;
+}
+
+static int apple_wdt_restart(struct watchdog_device *wdd, unsigned long mode,
+ void *cmd)
+{
+ struct apple_wdt *wdt = to_apple_wdt(wdd);
+
+ writel_relaxed(APPLE_WDT_CTRL_RESET_EN, wdt->regs + APPLE_WDT_WD1_CTRL);
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_BITE_TIME);
+ writel_relaxed(0, wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+
+ /*
+ * Flush writes and then wait for the SoC to reset. Even though the
+ * reset is queued almost immediately experiments have shown that it
+ * can take up to ~20-25ms until the SoC is actually reset. Just wait
+ * 50ms here to be safe.
+ */
+ (void)readl_relaxed(wdt->regs + APPLE_WDT_WD1_CUR_TIME);
+ mdelay(50);
+
+ return 0;
+}
+
+static void apple_wdt_clk_disable_unprepare(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static struct watchdog_ops apple_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = apple_wdt_start,
+ .stop = apple_wdt_stop,
+ .ping = apple_wdt_ping,
+ .set_timeout = apple_wdt_set_timeout,
+ .get_timeleft = apple_wdt_get_timeleft,
+ .restart = apple_wdt_restart,
+};
+
+static struct watchdog_info apple_wdt_info = {
+ .identity = "Apple SoC Watchdog",
+ .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+};
+
+static int apple_wdt_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct apple_wdt *wdt;
+ struct clk *clk;
+ u32 wdt_ctrl;
+ int ret;
+
+ wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ wdt->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(wdt->regs))
+ return PTR_ERR(wdt->regs);
+
+ clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, apple_wdt_clk_disable_unprepare,
+ clk);
+ if (ret)
+ return ret;
+
+ wdt->clk_rate = clk_get_rate(clk);
+ if (!wdt->clk_rate)
+ return -EINVAL;
+
+ wdt->wdd.ops = &apple_wdt_ops;
+ wdt->wdd.info = &apple_wdt_info;
+ wdt->wdd.max_timeout = U32_MAX / wdt->clk_rate;
+ wdt->wdd.timeout = APPLE_WDT_TIMEOUT_DEFAULT;
+
+ wdt_ctrl = readl_relaxed(wdt->regs + APPLE_WDT_WD1_CTRL);
+ if (wdt_ctrl & APPLE_WDT_CTRL_RESET_EN)
+ set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
+
+ watchdog_init_timeout(&wdt->wdd, 0, dev);
+ apple_wdt_set_timeout(&wdt->wdd, wdt->wdd.timeout);
+ watchdog_stop_on_unregister(&wdt->wdd);
+ watchdog_set_restart_priority(&wdt->wdd, 128);
+
+ return devm_watchdog_register_device(dev, &wdt->wdd);
+}
+
+static const struct of_device_id apple_wdt_of_match[] = {
+ { .compatible = "apple,wdt" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, apple_wdt_of_match);
+
+static struct platform_driver apple_wdt_driver = {
+ .driver = {
+ .name = "apple-watchdog",
+ .of_match_table = apple_wdt_of_match,
+ },
+ .probe = apple_wdt_probe,
+};
+module_platform_driver(apple_wdt_driver);
+
+MODULE_DESCRIPTION("Apple SoC watchdog driver");
+MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/watchdog/bcm63xx_wdt.c b/drivers/watchdog/bcm63xx_wdt.c
deleted file mode 100644
index 56cc262571a5..000000000000
--- a/drivers/watchdog/bcm63xx_wdt.c
+++ /dev/null
@@ -1,317 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Broadcom BCM63xx SoC watchdog driver
- *
- * Copyright (C) 2007, Miguel Gaio <miguel.gaio@efixo.com>
- * Copyright (C) 2008, Florian Fainelli <florian@openwrt.org>
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/watchdog.h>
-#include <linux/timer.h>
-#include <linux/jiffies.h>
-#include <linux/interrupt.h>
-#include <linux/ptrace.h>
-#include <linux/resource.h>
-#include <linux/platform_device.h>
-
-#include <bcm63xx_cpu.h>
-#include <bcm63xx_io.h>
-#include <bcm63xx_regs.h>
-#include <bcm63xx_timer.h>
-
-#define PFX KBUILD_MODNAME
-
-#define WDT_HZ 50000000 /* Fclk */
-#define WDT_DEFAULT_TIME 30 /* seconds */
-#define WDT_MAX_TIME 256 /* seconds */
-
-static struct {
- void __iomem *regs;
- struct timer_list timer;
- unsigned long inuse;
- atomic_t ticks;
-} bcm63xx_wdt_device;
-
-static int expect_close;
-
-static int wdt_time = WDT_DEFAULT_TIME;
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
- __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-/* HW functions */
-static void bcm63xx_wdt_hw_start(void)
-{
- bcm_writel(0xfffffffe, bcm63xx_wdt_device.regs + WDT_DEFVAL_REG);
- bcm_writel(WDT_START_1, bcm63xx_wdt_device.regs + WDT_CTL_REG);
- bcm_writel(WDT_START_2, bcm63xx_wdt_device.regs + WDT_CTL_REG);
-}
-
-static void bcm63xx_wdt_hw_stop(void)
-{
- bcm_writel(WDT_STOP_1, bcm63xx_wdt_device.regs + WDT_CTL_REG);
- bcm_writel(WDT_STOP_2, bcm63xx_wdt_device.regs + WDT_CTL_REG);
-}
-
-static void bcm63xx_wdt_isr(void *data)
-{
- struct pt_regs *regs = get_irq_regs();
-
- die(PFX " fire", regs);
-}
-
-static void bcm63xx_timer_tick(struct timer_list *unused)
-{
- if (!atomic_dec_and_test(&bcm63xx_wdt_device.ticks)) {
- bcm63xx_wdt_hw_start();
- mod_timer(&bcm63xx_wdt_device.timer, jiffies + HZ);
- } else
- pr_crit("watchdog will restart system\n");
-}
-
-static void bcm63xx_wdt_pet(void)
-{
- atomic_set(&bcm63xx_wdt_device.ticks, wdt_time);
-}
-
-static void bcm63xx_wdt_start(void)
-{
- bcm63xx_wdt_pet();
- bcm63xx_timer_tick(0);
-}
-
-static void bcm63xx_wdt_pause(void)
-{
- del_timer_sync(&bcm63xx_wdt_device.timer);
- bcm63xx_wdt_hw_stop();
-}
-
-static int bcm63xx_wdt_settimeout(int new_time)
-{
- if ((new_time <= 0) || (new_time > WDT_MAX_TIME))
- return -EINVAL;
-
- wdt_time = new_time;
-
- return 0;
-}
-
-static int bcm63xx_wdt_open(struct inode *inode, struct file *file)
-{
- if (test_and_set_bit(0, &bcm63xx_wdt_device.inuse))
- return -EBUSY;
-
- bcm63xx_wdt_start();
- return stream_open(inode, file);
-}
-
-static int bcm63xx_wdt_release(struct inode *inode, struct file *file)
-{
- if (expect_close == 42)
- bcm63xx_wdt_pause();
- else {
- pr_crit("Unexpected close, not stopping watchdog!\n");
- bcm63xx_wdt_start();
- }
- clear_bit(0, &bcm63xx_wdt_device.inuse);
- expect_close = 0;
- return 0;
-}
-
-static ssize_t bcm63xx_wdt_write(struct file *file, const char *data,
- size_t len, loff_t *ppos)
-{
- if (len) {
- if (!nowayout) {
- size_t i;
-
- /* In case it was set long ago */
- expect_close = 0;
-
- for (i = 0; i != len; i++) {
- char c;
- if (get_user(c, data + i))
- return -EFAULT;
- if (c == 'V')
- expect_close = 42;
- }
- }
- bcm63xx_wdt_pet();
- }
- return len;
-}
-
-static struct watchdog_info bcm63xx_wdt_info = {
- .identity = PFX,
- .options = WDIOF_SETTIMEOUT |
- WDIOF_KEEPALIVEPING |
- WDIOF_MAGICCLOSE,
-};
-
-
-static long bcm63xx_wdt_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- int __user *p = argp;
- int new_value, retval = -EINVAL;
-
- switch (cmd) {
- case WDIOC_GETSUPPORT:
- return copy_to_user(argp, &bcm63xx_wdt_info,
- sizeof(bcm63xx_wdt_info)) ? -EFAULT : 0;
-
- case WDIOC_GETSTATUS:
- case WDIOC_GETBOOTSTATUS:
- return put_user(0, p);
-
- case WDIOC_SETOPTIONS:
- if (get_user(new_value, p))
- return -EFAULT;
-
- if (new_value & WDIOS_DISABLECARD) {
- bcm63xx_wdt_pause();
- retval = 0;
- }
- if (new_value & WDIOS_ENABLECARD) {
- bcm63xx_wdt_start();
- retval = 0;
- }
-
- return retval;
-
- case WDIOC_KEEPALIVE:
- bcm63xx_wdt_pet();
- return 0;
-
- case WDIOC_SETTIMEOUT:
- if (get_user(new_value, p))
- return -EFAULT;
-
- if (bcm63xx_wdt_settimeout(new_value))
- return -EINVAL;
-
- bcm63xx_wdt_pet();
-
- fallthrough;
-
- case WDIOC_GETTIMEOUT:
- return put_user(wdt_time, p);
-
- default:
- return -ENOTTY;
-
- }
-}
-
-static const struct file_operations bcm63xx_wdt_fops = {
- .owner = THIS_MODULE,
- .llseek = no_llseek,
- .write = bcm63xx_wdt_write,
- .unlocked_ioctl = bcm63xx_wdt_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
- .open = bcm63xx_wdt_open,
- .release = bcm63xx_wdt_release,
-};
-
-static struct miscdevice bcm63xx_wdt_miscdev = {
- .minor = WATCHDOG_MINOR,
- .name = "watchdog",
- .fops = &bcm63xx_wdt_fops,
-};
-
-
-static int bcm63xx_wdt_probe(struct platform_device *pdev)
-{
- int ret;
- struct resource *r;
-
- timer_setup(&bcm63xx_wdt_device.timer, bcm63xx_timer_tick, 0);
-
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r) {
- dev_err(&pdev->dev, "failed to get resources\n");
- return -ENODEV;
- }
-
- bcm63xx_wdt_device.regs = devm_ioremap(&pdev->dev, r->start,
- resource_size(r));
- if (!bcm63xx_wdt_device.regs) {
- dev_err(&pdev->dev, "failed to remap I/O resources\n");
- return -ENXIO;
- }
-
- ret = bcm63xx_timer_register(TIMER_WDT_ID, bcm63xx_wdt_isr, NULL);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to register wdt timer isr\n");
- return ret;
- }
-
- if (bcm63xx_wdt_settimeout(wdt_time)) {
- bcm63xx_wdt_settimeout(WDT_DEFAULT_TIME);
- dev_info(&pdev->dev,
- ": wdt_time value must be 1 <= wdt_time <= 256, using %d\n",
- wdt_time);
- }
-
- ret = misc_register(&bcm63xx_wdt_miscdev);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to register watchdog device\n");
- goto unregister_timer;
- }
-
- dev_info(&pdev->dev, " started, timer margin: %d sec\n",
- WDT_DEFAULT_TIME);
-
- return 0;
-
-unregister_timer:
- bcm63xx_timer_unregister(TIMER_WDT_ID);
- return ret;
-}
-
-static int bcm63xx_wdt_remove(struct platform_device *pdev)
-{
- if (!nowayout)
- bcm63xx_wdt_pause();
-
- misc_deregister(&bcm63xx_wdt_miscdev);
- bcm63xx_timer_unregister(TIMER_WDT_ID);
- return 0;
-}
-
-static void bcm63xx_wdt_shutdown(struct platform_device *pdev)
-{
- bcm63xx_wdt_pause();
-}
-
-static struct platform_driver bcm63xx_wdt_driver = {
- .probe = bcm63xx_wdt_probe,
- .remove = bcm63xx_wdt_remove,
- .shutdown = bcm63xx_wdt_shutdown,
- .driver = {
- .name = "bcm63xx-wdt",
- }
-};
-
-module_platform_driver(bcm63xx_wdt_driver);
-
-MODULE_AUTHOR("Miguel Gaio <miguel.gaio@efixo.com>");
-MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
-MODULE_DESCRIPTION("Driver for the Broadcom BCM63xx SoC watchdog");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:bcm63xx-wdt");
diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c
index acaaa0005d5b..8656a137e9a4 100644
--- a/drivers/watchdog/bcm7038_wdt.c
+++ b/drivers/watchdog/bcm7038_wdt.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/platform_data/bcm7038_wdt.h>
#include <linux/pm.h>
#include <linux/watchdog.h>
@@ -133,8 +134,10 @@ static void bcm7038_clk_disable_unprepare(void *data)
static int bcm7038_wdt_probe(struct platform_device *pdev)
{
+ struct bcm7038_wdt_platform_data *pdata = pdev->dev.platform_data;
struct device *dev = &pdev->dev;
struct bcm7038_watchdog *wdt;
+ const char *clk_name = NULL;
int err;
wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
@@ -147,7 +150,10 @@ static int bcm7038_wdt_probe(struct platform_device *pdev)
if (IS_ERR(wdt->base))
return PTR_ERR(wdt->base);
- wdt->clk = devm_clk_get(dev, NULL);
+ if (pdata && pdata->clk_name)
+ clk_name = pdata->clk_name;
+
+ wdt->clk = devm_clk_get(dev, clk_name);
/* If unable to get clock, use default frequency */
if (!IS_ERR(wdt->clk)) {
err = clk_prepare_enable(wdt->clk);
@@ -217,8 +223,15 @@ static const struct of_device_id bcm7038_wdt_match[] = {
};
MODULE_DEVICE_TABLE(of, bcm7038_wdt_match);
+static const struct platform_device_id bcm7038_wdt_devtype[] = {
+ { .name = "bcm63xx-wdt" },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(platform, bcm7038_wdt_devtype);
+
static struct platform_driver bcm7038_wdt_driver = {
.probe = bcm7038_wdt_probe,
+ .id_table = bcm7038_wdt_devtype,
.driver = {
.name = "bcm7038-wdt",
.of_match_table = bcm7038_wdt_match,
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
index d79ce64e26a9..9adad1862bbd 100644
--- a/drivers/watchdog/da9063_wdt.c
+++ b/drivers/watchdog/da9063_wdt.c
@@ -14,6 +14,7 @@
#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/i2c.h>
#include <linux/delay.h>
#include <linux/mfd/da9063/registers.h>
#include <linux/mfd/da9063/core.h>
@@ -169,14 +170,19 @@ static int da9063_wdt_restart(struct watchdog_device *wdd, unsigned long action,
void *data)
{
struct da9063 *da9063 = watchdog_get_drvdata(wdd);
+ struct i2c_client *client = to_i2c_client(da9063->dev);
int ret;
- ret = regmap_write(da9063->regmap, DA9063_REG_CONTROL_F,
- DA9063_SHUTDOWN);
- if (ret)
+ /* Don't use regmap because it is not atomic safe */
+ ret = i2c_smbus_write_byte_data(client, DA9063_REG_CONTROL_F,
+ DA9063_SHUTDOWN);
+ if (ret < 0)
dev_alert(da9063->dev, "Failed to shutdown (err = %d)\n",
ret);
+ /* wait for reset to assert... */
+ mdelay(500);
+
return ret;
}
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index e6eaba6bae5b..584a56893b81 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -134,7 +134,7 @@ static unsigned int davinci_wdt_get_timeleft(struct watchdog_device *wdd)
timer_counter = ioread32(davinci_wdt->base + TIM12);
timer_counter |= ((u64)ioread32(davinci_wdt->base + TIM34) << 32);
- do_div(timer_counter, freq);
+ timer_counter = div64_ul(timer_counter, freq);
return wdd->timeout - timer_counter;
}
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index ee90c5f943f9..7f59c680de25 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -49,6 +49,7 @@
#define SIO_F81803_ID 0x1210 /* Chipset ID */
#define SIO_F81865_ID 0x0704 /* Chipset ID */
#define SIO_F81866_ID 0x1010 /* Chipset ID */
+#define SIO_F81966_ID 0x1502 /* F81804 chipset ID, same for f81966 */
#define F71808FG_REG_WDO_CONF 0xf0
#define F71808FG_REG_WDT_CONF 0xf5
@@ -105,7 +106,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with"
" given initial timeout. Zero (default) disables this feature.");
enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg,
- f81803, f81865, f81866};
+ f81803, f81865, f81866, f81966};
static const char * const fintek_wdt_names[] = {
"f71808fg",
@@ -118,6 +119,7 @@ static const char * const fintek_wdt_names[] = {
"f81803",
"f81865",
"f81866",
+ "f81966"
};
/* Super-I/O Function prototypes */
@@ -347,6 +349,7 @@ static int fintek_wdt_start(struct watchdog_device *wdd)
break;
case f81866:
+ case f81966:
/*
* GPIO1 Control Register when 27h BIT3:2 = 01 & BIT0 = 0.
* The PIN 70(GPIO15/WDTRST) is controlled by 2Ch:
@@ -373,7 +376,7 @@ static int fintek_wdt_start(struct watchdog_device *wdd)
superio_select(wd->sioaddr, SIO_F71808FG_LD_WDT);
superio_set_bit(wd->sioaddr, SIO_REG_ENABLE, 0);
- if (wd->type == f81865 || wd->type == f81866)
+ if (wd->type == f81865 || wd->type == f81866 || wd->type == f81966)
superio_set_bit(wd->sioaddr, F81865_REG_WDO_CONF,
F81865_FLAG_WDOUT_EN);
else
@@ -580,6 +583,9 @@ static int __init fintek_wdt_find(int sioaddr)
case SIO_F81866_ID:
type = f81866;
break;
+ case SIO_F81966_ID:
+ type = f81966;
+ break;
default:
pr_info("Unrecognized Fintek device: %04x\n",
(unsigned int)devid);
diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index 945f5e65db57..d3c9e2f6e63b 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c
@@ -198,7 +198,6 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
meson_gxbb_wdt_set_timeout(&data->wdt_dev, data->wdt_dev.timeout);
- watchdog_stop_on_reboot(&data->wdt_dev);
return devm_watchdog_register_device(dev, &data->wdt_dev);
}
diff --git a/drivers/watchdog/msc313e_wdt.c b/drivers/watchdog/msc313e_wdt.c
index 0d497aa0fb7d..90171431fc59 100644
--- a/drivers/watchdog/msc313e_wdt.c
+++ b/drivers/watchdog/msc313e_wdt.c
@@ -120,6 +120,10 @@ static int msc313e_wdt_probe(struct platform_device *pdev)
priv->wdev.max_timeout = U32_MAX / clk_get_rate(priv->clk);
priv->wdev.timeout = MSC313E_WDT_DEFAULT_TIMEOUT;
+ /* If the period is non-zero the WDT is running */
+ if (readw(priv->base + REG_WDT_MAX_PRD_L) | (readw(priv->base + REG_WDT_MAX_PRD_H) << 16))
+ set_bit(WDOG_HW_RUNNING, &priv->wdev.status);
+
watchdog_set_drvdata(&priv->wdev, priv);
watchdog_init_timeout(&priv->wdev, timeout, dev);
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 543cf38bd04e..4577a76dd464 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -339,7 +339,7 @@ static int mtk_wdt_probe(struct platform_device *pdev)
if (IS_ERR(mtk_wdt->wdt_base))
return PTR_ERR(mtk_wdt->wdt_base);
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq > 0) {
err = devm_request_irq(&pdev->dev, irq, mtk_wdt_isr, 0, "wdt_bark",
&mtk_wdt->wdt_dev);
diff --git a/drivers/watchdog/realtek_otto_wdt.c b/drivers/watchdog/realtek_otto_wdt.c
new file mode 100644
index 000000000000..60058a0c3ec4
--- /dev/null
+++ b/drivers/watchdog/realtek_otto_wdt.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Realtek Otto MIPS platform watchdog
+ *
+ * Watchdog timer that will reset the system after timeout, using the selected
+ * reset mode.
+ *
+ * Counter scaling and timeouts:
+ * - Base prescale of (2 << 25), providing tick duration T_0: 168ms @ 200MHz
+ * - PRESCALE: logarithmic prescaler adding a factor of {1, 2, 4, 8}
+ * - Phase 1: Times out after (PHASE1 + 1) × PRESCALE × T_0
+ * Generates an interrupt, WDT cannot be stopped after phase 1
+ * - Phase 2: starts after phase 1, times out after (PHASE2 + 1) × PRESCALE × T_0
+ * Resets the system according to RST_MODE
+ */
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#define OTTO_WDT_REG_CNTR 0x0
+#define OTTO_WDT_CNTR_PING BIT(31)
+
+#define OTTO_WDT_REG_INTR 0x4
+#define OTTO_WDT_INTR_PHASE_1 BIT(31)
+#define OTTO_WDT_INTR_PHASE_2 BIT(30)
+
+#define OTTO_WDT_REG_CTRL 0x8
+#define OTTO_WDT_CTRL_ENABLE BIT(31)
+#define OTTO_WDT_CTRL_PRESCALE GENMASK(30, 29)
+#define OTTO_WDT_CTRL_PHASE1 GENMASK(26, 22)
+#define OTTO_WDT_CTRL_PHASE2 GENMASK(19, 15)
+#define OTTO_WDT_CTRL_RST_MODE GENMASK(1, 0)
+#define OTTO_WDT_MODE_SOC 0
+#define OTTO_WDT_MODE_CPU 1
+#define OTTO_WDT_MODE_SOFTWARE 2
+#define OTTO_WDT_CTRL_DEFAULT OTTO_WDT_MODE_CPU
+
+#define OTTO_WDT_PRESCALE_MAX 3
+
+/*
+ * One higher than the max values contained in PHASE{1,2}, since a value of 0
+ * corresponds to one tick.
+ */
+#define OTTO_WDT_PHASE_TICKS_MAX 32
+
+/*
+ * The maximum reset delay is actually 2×32 ticks, but that would require large
+ * pretimeout values for timeouts longer than 32 ticks. Limit the maximum timeout
+ * to 32 + 1 to ensure small pretimeout values can be configured as expected.
+ */
+#define OTTO_WDT_TIMEOUT_TICKS_MAX (OTTO_WDT_PHASE_TICKS_MAX + 1)
+
+struct otto_wdt_ctrl {
+ struct watchdog_device wdev;
+ struct device *dev;
+ void __iomem *base;
+ unsigned int clk_rate_khz;
+ int irq_phase1;
+};
+
+static int otto_wdt_start(struct watchdog_device *wdev)
+{
+ struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev);
+ u32 v;
+
+ v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL);
+ v |= OTTO_WDT_CTRL_ENABLE;
+ iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ return 0;
+}
+
+static int otto_wdt_stop(struct watchdog_device *wdev)
+{
+ struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev);
+ u32 v;
+
+ v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL);
+ v &= ~OTTO_WDT_CTRL_ENABLE;
+ iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ return 0;
+}
+
+static int otto_wdt_ping(struct watchdog_device *wdev)
+{
+ struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev);
+
+ iowrite32(OTTO_WDT_CNTR_PING, ctrl->base + OTTO_WDT_REG_CNTR);
+
+ return 0;
+}
+
+static int otto_wdt_tick_ms(struct otto_wdt_ctrl *ctrl, int prescale)
+{
+ return DIV_ROUND_CLOSEST(1 << (25 + prescale), ctrl->clk_rate_khz);
+}
+
+/*
+ * The timer asserts the PHASE1/PHASE2 IRQs when the number of ticks exceeds
+ * the value stored in those fields. This means each phase will run for at least
+ * one tick, so small values need to be clamped to correctly reflect the timeout.
+ */
+static inline unsigned int div_round_ticks(unsigned int val, unsigned int tick_duration,
+ unsigned int min_ticks)
+{
+ return max(min_ticks, DIV_ROUND_UP(val, tick_duration));
+}
+
+static int otto_wdt_determine_timeouts(struct watchdog_device *wdev, unsigned int timeout,
+ unsigned int pretimeout)
+{
+ struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev);
+ unsigned int pretimeout_ms = pretimeout * 1000;
+ unsigned int timeout_ms = timeout * 1000;
+ unsigned int prescale_next = 0;
+ unsigned int phase1_ticks;
+ unsigned int phase2_ticks;
+ unsigned int total_ticks;
+ unsigned int prescale;
+ unsigned int tick_ms;
+ u32 v;
+
+ do {
+ prescale = prescale_next;
+ if (prescale > OTTO_WDT_PRESCALE_MAX)
+ return -EINVAL;
+
+ tick_ms = otto_wdt_tick_ms(ctrl, prescale);
+ total_ticks = div_round_ticks(timeout_ms, tick_ms, 2);
+ phase1_ticks = div_round_ticks(timeout_ms - pretimeout_ms, tick_ms, 1);
+ phase2_ticks = total_ticks - phase1_ticks;
+
+ prescale_next++;
+ } while (phase1_ticks > OTTO_WDT_PHASE_TICKS_MAX
+ || phase2_ticks > OTTO_WDT_PHASE_TICKS_MAX);
+
+ v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL);
+
+ v &= ~(OTTO_WDT_CTRL_PRESCALE | OTTO_WDT_CTRL_PHASE1 | OTTO_WDT_CTRL_PHASE2);
+ v |= FIELD_PREP(OTTO_WDT_CTRL_PHASE1, phase1_ticks - 1);
+ v |= FIELD_PREP(OTTO_WDT_CTRL_PHASE2, phase2_ticks - 1);
+ v |= FIELD_PREP(OTTO_WDT_CTRL_PRESCALE, prescale);
+
+ iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ timeout_ms = total_ticks * tick_ms;
+ ctrl->wdev.timeout = timeout_ms / 1000;
+
+ pretimeout_ms = phase2_ticks * tick_ms;
+ ctrl->wdev.pretimeout = pretimeout_ms / 1000;
+
+ return 0;
+}
+
+static int otto_wdt_set_timeout(struct watchdog_device *wdev, unsigned int val)
+{
+ return otto_wdt_determine_timeouts(wdev, val, min(wdev->pretimeout, val - 1));
+}
+
+static int otto_wdt_set_pretimeout(struct watchdog_device *wdev, unsigned int val)
+{
+ return otto_wdt_determine_timeouts(wdev, wdev->timeout, val);
+}
+
+static int otto_wdt_restart(struct watchdog_device *wdev, unsigned long reboot_mode,
+ void *data)
+{
+ struct otto_wdt_ctrl *ctrl = watchdog_get_drvdata(wdev);
+ u32 reset_mode;
+ u32 v;
+
+ disable_irq(ctrl->irq_phase1);
+
+ switch (reboot_mode) {
+ case REBOOT_SOFT:
+ reset_mode = OTTO_WDT_MODE_SOFTWARE;
+ break;
+ case REBOOT_WARM:
+ reset_mode = OTTO_WDT_MODE_CPU;
+ break;
+ default:
+ reset_mode = OTTO_WDT_MODE_SOC;
+ break;
+ }
+
+ /* Configure for shortest timeout and wait for reset to occur */
+ v = FIELD_PREP(OTTO_WDT_CTRL_RST_MODE, reset_mode) | OTTO_WDT_CTRL_ENABLE;
+ iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ mdelay(3 * otto_wdt_tick_ms(ctrl, 0));
+
+ return 0;
+}
+
+static irqreturn_t otto_wdt_phase1_isr(int irq, void *dev_id)
+{
+ struct otto_wdt_ctrl *ctrl = dev_id;
+
+ iowrite32(OTTO_WDT_INTR_PHASE_1, ctrl->base + OTTO_WDT_REG_INTR);
+ dev_crit(ctrl->dev, "phase 1 timeout\n");
+ watchdog_notify_pretimeout(&ctrl->wdev);
+
+ return IRQ_HANDLED;
+}
+
+static const struct watchdog_ops otto_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = otto_wdt_start,
+ .stop = otto_wdt_stop,
+ .ping = otto_wdt_ping,
+ .set_timeout = otto_wdt_set_timeout,
+ .set_pretimeout = otto_wdt_set_pretimeout,
+ .restart = otto_wdt_restart,
+};
+
+static const struct watchdog_info otto_wdt_info = {
+ .identity = "Realtek Otto watchdog timer",
+ .options = WDIOF_KEEPALIVEPING |
+ WDIOF_MAGICCLOSE |
+ WDIOF_SETTIMEOUT |
+ WDIOF_PRETIMEOUT,
+};
+
+static void otto_wdt_clock_action(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static int otto_wdt_probe_clk(struct otto_wdt_ctrl *ctrl)
+{
+ struct clk *clk = devm_clk_get(ctrl->dev, NULL);
+ int ret;
+
+ if (IS_ERR(clk))
+ return dev_err_probe(ctrl->dev, PTR_ERR(clk), "Failed to get clock\n");
+
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ return dev_err_probe(ctrl->dev, ret, "Failed to enable clock\n");
+
+ ret = devm_add_action_or_reset(ctrl->dev, otto_wdt_clock_action, clk);
+ if (ret)
+ return ret;
+
+ ctrl->clk_rate_khz = clk_get_rate(clk) / 1000;
+ if (ctrl->clk_rate_khz == 0)
+ return dev_err_probe(ctrl->dev, -ENXIO, "Failed to get clock rate\n");
+
+ return 0;
+}
+
+static int otto_wdt_probe_reset_mode(struct otto_wdt_ctrl *ctrl)
+{
+ static const char *mode_property = "realtek,reset-mode";
+ const struct fwnode_handle *node = ctrl->dev->fwnode;
+ int mode_count;
+ u32 mode;
+ u32 v;
+
+ if (!node)
+ return -ENXIO;
+
+ mode_count = fwnode_property_string_array_count(node, mode_property);
+ if (mode_count < 0)
+ return mode_count;
+ else if (mode_count == 0)
+ return 0;
+ else if (mode_count != 1)
+ return -EINVAL;
+
+ if (fwnode_property_match_string(node, mode_property, "soc") == 0)
+ mode = OTTO_WDT_MODE_SOC;
+ else if (fwnode_property_match_string(node, mode_property, "cpu") == 0)
+ mode = OTTO_WDT_MODE_CPU;
+ else if (fwnode_property_match_string(node, mode_property, "software") == 0)
+ mode = OTTO_WDT_MODE_SOFTWARE;
+ else
+ return -EINVAL;
+
+ v = ioread32(ctrl->base + OTTO_WDT_REG_CTRL);
+ v &= ~OTTO_WDT_CTRL_RST_MODE;
+ v |= FIELD_PREP(OTTO_WDT_CTRL_RST_MODE, mode);
+ iowrite32(v, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ return 0;
+}
+
+static int otto_wdt_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct otto_wdt_ctrl *ctrl;
+ unsigned int max_tick_ms;
+ int ret;
+
+ ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ ctrl->dev = dev;
+ ctrl->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ctrl->base))
+ return PTR_ERR(ctrl->base);
+
+ /* Clear any old interrupts and reset initial state */
+ iowrite32(OTTO_WDT_INTR_PHASE_1 | OTTO_WDT_INTR_PHASE_2,
+ ctrl->base + OTTO_WDT_REG_INTR);
+ iowrite32(OTTO_WDT_CTRL_DEFAULT, ctrl->base + OTTO_WDT_REG_CTRL);
+
+ ret = otto_wdt_probe_clk(ctrl);
+ if (ret)
+ return ret;
+
+ ctrl->irq_phase1 = platform_get_irq_byname(pdev, "phase1");
+ if (ctrl->irq_phase1 < 0)
+ return ctrl->irq_phase1;
+
+ ret = devm_request_irq(dev, ctrl->irq_phase1, otto_wdt_phase1_isr, 0,
+ "realtek-otto-wdt", ctrl);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to get IRQ for phase1\n");
+
+ ret = otto_wdt_probe_reset_mode(ctrl);
+ if (ret)
+ return dev_err_probe(dev, ret, "Invalid reset mode specified\n");
+
+ ctrl->wdev.parent = dev;
+ ctrl->wdev.info = &otto_wdt_info;
+ ctrl->wdev.ops = &otto_wdt_ops;
+
+ /*
+ * Since pretimeout cannot be disabled, min. timeout is twice the
+ * subsystem resolution. Max. timeout is ca. 43s at a bus clock of 200MHz.
+ */
+ ctrl->wdev.min_timeout = 2;
+ max_tick_ms = otto_wdt_tick_ms(ctrl, OTTO_WDT_PRESCALE_MAX);
+ ctrl->wdev.max_hw_heartbeat_ms = max_tick_ms * OTTO_WDT_TIMEOUT_TICKS_MAX;
+ ctrl->wdev.timeout = min(30U, ctrl->wdev.max_hw_heartbeat_ms / 1000);
+
+ watchdog_set_drvdata(&ctrl->wdev, ctrl);
+ watchdog_init_timeout(&ctrl->wdev, 0, dev);
+ watchdog_stop_on_reboot(&ctrl->wdev);
+ watchdog_set_restart_priority(&ctrl->wdev, 128);
+
+ ret = otto_wdt_determine_timeouts(&ctrl->wdev, ctrl->wdev.timeout, 1);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to set timeout\n");
+
+ return devm_watchdog_register_device(dev, &ctrl->wdev);
+}
+
+static const struct of_device_id otto_wdt_ids[] = {
+ { .compatible = "realtek,rtl8380-wdt" },
+ { .compatible = "realtek,rtl8390-wdt" },
+ { .compatible = "realtek,rtl9300-wdt" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, otto_wdt_ids);
+
+static struct platform_driver otto_wdt_driver = {
+ .probe = otto_wdt_probe,
+ .driver = {
+ .name = "realtek-otto-watchdog",
+ .of_match_table = otto_wdt_ids,
+ },
+};
+module_platform_driver(otto_wdt_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Sander Vanheule <sander@svanheule.net>");
+MODULE_DESCRIPTION("Realtek Otto watchdog timer driver");
diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c
new file mode 100644
index 000000000000..6b426df34fd6
--- /dev/null
+++ b/drivers/watchdog/rzg2l_wdt.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L WDT Watchdog Driver
+ *
+ * Copyright (C) 2021 Renesas Electronics Corporation
+ */
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/units.h>
+#include <linux/watchdog.h>
+
+#define WDTCNT 0x00
+#define WDTSET 0x04
+#define WDTTIM 0x08
+#define WDTINT 0x0C
+#define WDTCNT_WDTEN BIT(0)
+#define WDTINT_INTDISP BIT(0)
+
+#define WDT_DEFAULT_TIMEOUT 60U
+
+/* Setting period time register only 12 bit set in WDTSET[31:20] */
+#define WDTSET_COUNTER_MASK (0xFFF00000)
+#define WDTSET_COUNTER_VAL(f) ((f) << 20)
+
+#define F2CYCLE_NSEC(f) (1000000000 / (f))
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct rzg2l_wdt_priv {
+ void __iomem *base;
+ struct watchdog_device wdev;
+ struct reset_control *rstc;
+ unsigned long osc_clk_rate;
+ unsigned long delay;
+};
+
+static void rzg2l_wdt_wait_delay(struct rzg2l_wdt_priv *priv)
+{
+ /* delay timer when change the setting register */
+ ndelay(priv->delay);
+}
+
+static u32 rzg2l_wdt_get_cycle_usec(unsigned long cycle, u32 wdttime)
+{
+ u64 timer_cycle_us = 1024 * 1024 * (wdttime + 1) * MICRO;
+
+ return div64_ul(timer_cycle_us, cycle);
+}
+
+static void rzg2l_wdt_write(struct rzg2l_wdt_priv *priv, u32 val, unsigned int reg)
+{
+ if (reg == WDTSET)
+ val &= WDTSET_COUNTER_MASK;
+
+ writel_relaxed(val, priv->base + reg);
+ /* Registers other than the WDTINT is always synchronized with WDT_CLK */
+ if (reg != WDTINT)
+ rzg2l_wdt_wait_delay(priv);
+}
+
+static void rzg2l_wdt_init_timeout(struct watchdog_device *wdev)
+{
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+ u32 time_out;
+
+ /* Clear Lapsed Time Register and clear Interrupt */
+ rzg2l_wdt_write(priv, WDTINT_INTDISP, WDTINT);
+ /* 2 consecutive overflow cycle needed to trigger reset */
+ time_out = (wdev->timeout * (MICRO / 2)) /
+ rzg2l_wdt_get_cycle_usec(priv->osc_clk_rate, 0);
+ rzg2l_wdt_write(priv, WDTSET_COUNTER_VAL(time_out), WDTSET);
+}
+
+static int rzg2l_wdt_start(struct watchdog_device *wdev)
+{
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ reset_control_deassert(priv->rstc);
+ pm_runtime_get_sync(wdev->parent);
+
+ /* Initialize time out */
+ rzg2l_wdt_init_timeout(wdev);
+
+ /* Initialize watchdog counter register */
+ rzg2l_wdt_write(priv, 0, WDTTIM);
+
+ /* Enable watchdog timer*/
+ rzg2l_wdt_write(priv, WDTCNT_WDTEN, WDTCNT);
+
+ return 0;
+}
+
+static int rzg2l_wdt_stop(struct watchdog_device *wdev)
+{
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ pm_runtime_put(wdev->parent);
+ reset_control_assert(priv->rstc);
+
+ return 0;
+}
+
+static int rzg2l_wdt_restart(struct watchdog_device *wdev,
+ unsigned long action, void *data)
+{
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ /* Reset the module before we modify any register */
+ reset_control_reset(priv->rstc);
+ pm_runtime_get_sync(wdev->parent);
+
+ /* smallest counter value to reboot soon */
+ rzg2l_wdt_write(priv, WDTSET_COUNTER_VAL(1), WDTSET);
+
+ /* Enable watchdog timer*/
+ rzg2l_wdt_write(priv, WDTCNT_WDTEN, WDTCNT);
+
+ return 0;
+}
+
+static const struct watchdog_info rzg2l_wdt_ident = {
+ .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+ .identity = "Renesas RZ/G2L WDT Watchdog",
+};
+
+static int rzg2l_wdt_ping(struct watchdog_device *wdev)
+{
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ rzg2l_wdt_write(priv, WDTINT_INTDISP, WDTINT);
+
+ return 0;
+}
+
+static const struct watchdog_ops rzg2l_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = rzg2l_wdt_start,
+ .stop = rzg2l_wdt_stop,
+ .ping = rzg2l_wdt_ping,
+ .restart = rzg2l_wdt_restart,
+};
+
+static void rzg2l_wdt_reset_assert_pm_disable_put(void *data)
+{
+ struct watchdog_device *wdev = data;
+ struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ pm_runtime_put(wdev->parent);
+ pm_runtime_disable(wdev->parent);
+ reset_control_assert(priv->rstc);
+}
+
+static int rzg2l_wdt_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rzg2l_wdt_priv *priv;
+ unsigned long pclk_rate;
+ struct clk *wdt_clk;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ /* Get watchdog main clock */
+ wdt_clk = clk_get(&pdev->dev, "oscclk");
+ if (IS_ERR(wdt_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(wdt_clk), "no oscclk");
+
+ priv->osc_clk_rate = clk_get_rate(wdt_clk);
+ clk_put(wdt_clk);
+ if (!priv->osc_clk_rate)
+ return dev_err_probe(&pdev->dev, -EINVAL, "oscclk rate is 0");
+
+ /* Get Peripheral clock */
+ wdt_clk = clk_get(&pdev->dev, "pclk");
+ if (IS_ERR(wdt_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(wdt_clk), "no pclk");
+
+ pclk_rate = clk_get_rate(wdt_clk);
+ clk_put(wdt_clk);
+ if (!pclk_rate)
+ return dev_err_probe(&pdev->dev, -EINVAL, "pclk rate is 0");
+
+ priv->delay = F2CYCLE_NSEC(priv->osc_clk_rate) * 6 + F2CYCLE_NSEC(pclk_rate) * 9;
+
+ priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(priv->rstc))
+ return dev_err_probe(&pdev->dev, PTR_ERR(priv->rstc),
+ "failed to get cpg reset");
+
+ reset_control_deassert(priv->rstc);
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+ dev_err(dev, "pm_runtime_resume_and_get failed ret=%pe", ERR_PTR(ret));
+ goto out_pm_get;
+ }
+
+ priv->wdev.info = &rzg2l_wdt_ident;
+ priv->wdev.ops = &rzg2l_wdt_ops;
+ priv->wdev.parent = dev;
+ priv->wdev.min_timeout = 1;
+ priv->wdev.max_timeout = rzg2l_wdt_get_cycle_usec(priv->osc_clk_rate, 0xfff) /
+ USEC_PER_SEC;
+ priv->wdev.timeout = WDT_DEFAULT_TIMEOUT;
+
+ watchdog_set_drvdata(&priv->wdev, priv);
+ ret = devm_add_action_or_reset(&pdev->dev,
+ rzg2l_wdt_reset_assert_pm_disable_put,
+ &priv->wdev);
+ if (ret < 0)
+ return ret;
+
+ watchdog_set_nowayout(&priv->wdev, nowayout);
+ watchdog_stop_on_unregister(&priv->wdev);
+
+ ret = watchdog_init_timeout(&priv->wdev, 0, dev);
+ if (ret)
+ dev_warn(dev, "Specified timeout invalid, using default");
+
+ return devm_watchdog_register_device(&pdev->dev, &priv->wdev);
+
+out_pm_get:
+ pm_runtime_disable(dev);
+ reset_control_assert(priv->rstc);
+
+ return ret;
+}
+
+static const struct of_device_id rzg2l_wdt_ids[] = {
+ { .compatible = "renesas,rzg2l-wdt", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rzg2l_wdt_ids);
+
+static struct platform_driver rzg2l_wdt_driver = {
+ .driver = {
+ .name = "rzg2l_wdt",
+ .of_match_table = rzg2l_wdt_ids,
+ },
+ .probe = rzg2l_wdt_probe,
+};
+module_platform_driver(rzg2l_wdt_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/G2L WDT Watchdog Driver");
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 2395f353e52d..6db22f2e3a4f 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -56,13 +56,58 @@
#define EXYNOS5_RST_STAT_REG_OFFSET 0x0404
#define EXYNOS5_WDT_DISABLE_REG_OFFSET 0x0408
#define EXYNOS5_WDT_MASK_RESET_REG_OFFSET 0x040c
-#define QUIRK_HAS_PMU_CONFIG (1 << 0)
-#define QUIRK_HAS_RST_STAT (1 << 1)
-#define QUIRK_HAS_WTCLRINT_REG (1 << 2)
+#define EXYNOS850_CLUSTER0_NONCPU_OUT 0x1220
+#define EXYNOS850_CLUSTER0_NONCPU_INT_EN 0x1244
+#define EXYNOS850_CLUSTER1_NONCPU_OUT 0x1620
+#define EXYNOS850_CLUSTER1_NONCPU_INT_EN 0x1644
+
+#define EXYNOS850_CLUSTER0_WDTRESET_BIT 24
+#define EXYNOS850_CLUSTER1_WDTRESET_BIT 23
+
+/**
+ * DOC: Quirk flags for different Samsung watchdog IP-cores
+ *
+ * This driver supports multiple Samsung SoCs, each of which might have
+ * different set of registers and features supported. As watchdog block
+ * sometimes requires modifying PMU registers for proper functioning, register
+ * differences in both watchdog and PMU IP-cores should be accounted for. Quirk
+ * flags described below serve the purpose of telling the driver about mentioned
+ * SoC traits, and can be specified in driver data for each particular supported
+ * device.
+ *
+ * %QUIRK_HAS_WTCLRINT_REG: Watchdog block has WTCLRINT register. It's used to
+ * clear the interrupt once the interrupt service routine is complete. It's
+ * write-only, writing any values to this register clears the interrupt, but
+ * reading is not permitted.
+ *
+ * %QUIRK_HAS_PMU_MASK_RESET: PMU block has the register for disabling/enabling
+ * WDT reset request. On old SoCs it's usually called MASK_WDT_RESET_REQUEST,
+ * new SoCs have CLUSTERx_NONCPU_INT_EN register, which 'mask_bit' value is
+ * inverted compared to the former one.
+ *
+ * %QUIRK_HAS_PMU_RST_STAT: PMU block has RST_STAT (reset status) register,
+ * which contains bits indicating the reason for most recent CPU reset. If
+ * present, driver will use this register to check if previous reboot was due to
+ * watchdog timer reset.
+ *
+ * %QUIRK_HAS_PMU_AUTO_DISABLE: PMU block has AUTOMATIC_WDT_RESET_DISABLE
+ * register. If 'mask_bit' bit is set, PMU will disable WDT reset when
+ * corresponding processor is in reset state.
+ *
+ * %QUIRK_HAS_PMU_CNT_EN: PMU block has some register (e.g. CLUSTERx_NONCPU_OUT)
+ * with "watchdog counter enable" bit. That bit should be set to make watchdog
+ * counter running.
+ */
+#define QUIRK_HAS_WTCLRINT_REG (1 << 0)
+#define QUIRK_HAS_PMU_MASK_RESET (1 << 1)
+#define QUIRK_HAS_PMU_RST_STAT (1 << 2)
+#define QUIRK_HAS_PMU_AUTO_DISABLE (1 << 3)
+#define QUIRK_HAS_PMU_CNT_EN (1 << 4)
/* These quirks require that we have a PMU register map */
-#define QUIRKS_HAVE_PMUREG (QUIRK_HAS_PMU_CONFIG | \
- QUIRK_HAS_RST_STAT)
+#define QUIRKS_HAVE_PMUREG \
+ (QUIRK_HAS_PMU_MASK_RESET | QUIRK_HAS_PMU_RST_STAT | \
+ QUIRK_HAS_PMU_AUTO_DISABLE | QUIRK_HAS_PMU_CNT_EN)
static bool nowayout = WATCHDOG_NOWAYOUT;
static int tmr_margin;
@@ -90,26 +135,33 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to
* timer reset functionality.
* @mask_reset_reg: Offset in pmureg for the register that masks the watchdog
* timer reset functionality.
+ * @mask_reset_inv: If set, mask_reset_reg value will have inverted meaning.
* @mask_bit: Bit number for the watchdog timer in the disable register and the
* mask reset register.
* @rst_stat_reg: Offset in pmureg for the register that has the reset status.
* @rst_stat_bit: Bit number in the rst_stat register indicating a watchdog
* reset.
+ * @cnt_en_reg: Offset in pmureg for the register that enables WDT counter.
+ * @cnt_en_bit: Bit number for "watchdog counter enable" in cnt_en register.
* @quirks: A bitfield of quirks.
*/
struct s3c2410_wdt_variant {
int disable_reg;
int mask_reset_reg;
+ bool mask_reset_inv;
int mask_bit;
int rst_stat_reg;
int rst_stat_bit;
+ int cnt_en_reg;
+ int cnt_en_bit;
u32 quirks;
};
struct s3c2410_wdt {
struct device *dev;
- struct clk *clock;
+ struct clk *bus_clk; /* for register interface (PCLK) */
+ struct clk *src_clk; /* for WDT counter */
void __iomem *reg_base;
unsigned int count;
spinlock_t lock;
@@ -136,8 +188,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos5250 = {
.mask_bit = 20,
.rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
.rst_stat_bit = 20,
- .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
- | QUIRK_HAS_WTCLRINT_REG,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE,
};
static const struct s3c2410_wdt_variant drv_data_exynos5420 = {
@@ -146,8 +198,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos5420 = {
.mask_bit = 0,
.rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
.rst_stat_bit = 9,
- .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
- | QUIRK_HAS_WTCLRINT_REG,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE,
};
static const struct s3c2410_wdt_variant drv_data_exynos7 = {
@@ -156,8 +208,32 @@ static const struct s3c2410_wdt_variant drv_data_exynos7 = {
.mask_bit = 23,
.rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
.rst_stat_bit = 23, /* A57 WDTRESET */
- .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
- | QUIRK_HAS_WTCLRINT_REG,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_AUTO_DISABLE,
+};
+
+static const struct s3c2410_wdt_variant drv_data_exynos850_cl0 = {
+ .mask_reset_reg = EXYNOS850_CLUSTER0_NONCPU_INT_EN,
+ .mask_bit = 2,
+ .mask_reset_inv = true,
+ .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
+ .rst_stat_bit = EXYNOS850_CLUSTER0_WDTRESET_BIT,
+ .cnt_en_reg = EXYNOS850_CLUSTER0_NONCPU_OUT,
+ .cnt_en_bit = 7,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN,
+};
+
+static const struct s3c2410_wdt_variant drv_data_exynos850_cl1 = {
+ .mask_reset_reg = EXYNOS850_CLUSTER1_NONCPU_INT_EN,
+ .mask_bit = 2,
+ .mask_reset_inv = true,
+ .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
+ .rst_stat_bit = EXYNOS850_CLUSTER1_WDTRESET_BIT,
+ .cnt_en_reg = EXYNOS850_CLUSTER1_NONCPU_OUT,
+ .cnt_en_bit = 7,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | \
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN,
};
static const struct of_device_id s3c2410_wdt_match[] = {
@@ -171,6 +247,8 @@ static const struct of_device_id s3c2410_wdt_match[] = {
.data = &drv_data_exynos5420 },
{ .compatible = "samsung,exynos7-wdt",
.data = &drv_data_exynos7 },
+ { .compatible = "samsung,exynos850-wdt",
+ .data = &drv_data_exynos850_cl0 },
{},
};
MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
@@ -187,9 +265,14 @@ MODULE_DEVICE_TABLE(platform, s3c2410_wdt_ids);
/* functions */
-static inline unsigned int s3c2410wdt_max_timeout(struct clk *clock)
+static inline unsigned long s3c2410wdt_get_freq(struct s3c2410_wdt *wdt)
{
- unsigned long freq = clk_get_rate(clock);
+ return clk_get_rate(wdt->src_clk ? wdt->src_clk : wdt->bus_clk);
+}
+
+static inline unsigned int s3c2410wdt_max_timeout(struct s3c2410_wdt *wdt)
+{
+ const unsigned long freq = s3c2410wdt_get_freq(wdt);
return S3C2410_WTCNT_MAXCNT / (freq / (S3C2410_WTCON_PRESCALE_MAX + 1)
/ S3C2410_WTCON_MAXDIV);
@@ -200,35 +283,74 @@ static inline struct s3c2410_wdt *freq_to_wdt(struct notifier_block *nb)
return container_of(nb, struct s3c2410_wdt, freq_transition);
}
-static int s3c2410wdt_mask_and_disable_reset(struct s3c2410_wdt *wdt, bool mask)
+static int s3c2410wdt_disable_wdt_reset(struct s3c2410_wdt *wdt, bool mask)
{
+ const u32 mask_val = BIT(wdt->drv_data->mask_bit);
+ const u32 val = mask ? mask_val : 0;
int ret;
- u32 mask_val = 1 << wdt->drv_data->mask_bit;
- u32 val = 0;
- /* No need to do anything if no PMU CONFIG needed */
- if (!(wdt->drv_data->quirks & QUIRK_HAS_PMU_CONFIG))
- return 0;
+ ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->disable_reg,
+ mask_val, val);
+ if (ret < 0)
+ dev_err(wdt->dev, "failed to update reg(%d)\n", ret);
- if (mask)
- val = mask_val;
+ return ret;
+}
+
+static int s3c2410wdt_mask_wdt_reset(struct s3c2410_wdt *wdt, bool mask)
+{
+ const u32 mask_val = BIT(wdt->drv_data->mask_bit);
+ const bool val_inv = wdt->drv_data->mask_reset_inv;
+ const u32 val = (mask ^ val_inv) ? mask_val : 0;
+ int ret;
- ret = regmap_update_bits(wdt->pmureg,
- wdt->drv_data->disable_reg,
- mask_val, val);
+ ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->mask_reset_reg,
+ mask_val, val);
if (ret < 0)
- goto error;
+ dev_err(wdt->dev, "failed to update reg(%d)\n", ret);
+
+ return ret;
+}
- ret = regmap_update_bits(wdt->pmureg,
- wdt->drv_data->mask_reset_reg,
- mask_val, val);
- error:
+static int s3c2410wdt_enable_counter(struct s3c2410_wdt *wdt, bool en)
+{
+ const u32 mask_val = BIT(wdt->drv_data->cnt_en_bit);
+ const u32 val = en ? mask_val : 0;
+ int ret;
+
+ ret = regmap_update_bits(wdt->pmureg, wdt->drv_data->cnt_en_reg,
+ mask_val, val);
if (ret < 0)
dev_err(wdt->dev, "failed to update reg(%d)\n", ret);
return ret;
}
+static int s3c2410wdt_enable(struct s3c2410_wdt *wdt, bool en)
+{
+ int ret;
+
+ if (wdt->drv_data->quirks & QUIRK_HAS_PMU_AUTO_DISABLE) {
+ ret = s3c2410wdt_disable_wdt_reset(wdt, !en);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (wdt->drv_data->quirks & QUIRK_HAS_PMU_MASK_RESET) {
+ ret = s3c2410wdt_mask_wdt_reset(wdt, !en);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (wdt->drv_data->quirks & QUIRK_HAS_PMU_CNT_EN) {
+ ret = s3c2410wdt_enable_counter(wdt, en);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
{
struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
@@ -300,7 +422,7 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd,
unsigned int timeout)
{
struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
- unsigned long freq = clk_get_rate(wdt->clock);
+ unsigned long freq = s3c2410wdt_get_freq(wdt);
unsigned int count;
unsigned int divisor = 1;
unsigned long wtcon;
@@ -482,7 +604,7 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
unsigned int rst_stat;
int ret;
- if (!(wdt->drv_data->quirks & QUIRK_HAS_RST_STAT))
+ if (!(wdt->drv_data->quirks & QUIRK_HAS_PMU_RST_STAT))
return 0;
ret = regmap_read(wdt->pmureg, wdt->drv_data->rst_stat_reg, &rst_stat);
@@ -498,14 +620,40 @@ static inline const struct s3c2410_wdt_variant *
s3c2410_get_wdt_drv_data(struct platform_device *pdev)
{
const struct s3c2410_wdt_variant *variant;
+ struct device *dev = &pdev->dev;
- variant = of_device_get_match_data(&pdev->dev);
+ variant = of_device_get_match_data(dev);
if (!variant) {
/* Device matched by platform_device_id */
variant = (struct s3c2410_wdt_variant *)
platform_get_device_id(pdev)->driver_data;
}
+#ifdef CONFIG_OF
+ /* Choose Exynos850 driver data w.r.t. cluster index */
+ if (variant == &drv_data_exynos850_cl0) {
+ u32 index;
+ int err;
+
+ err = of_property_read_u32(dev->of_node,
+ "samsung,cluster-index", &index);
+ if (err) {
+ dev_err(dev, "failed to get cluster index\n");
+ return NULL;
+ }
+
+ switch (index) {
+ case 0:
+ return &drv_data_exynos850_cl0;
+ case 1:
+ return &drv_data_exynos850_cl1;
+ default:
+ dev_err(dev, "wrong cluster index: %u\n", index);
+ return NULL;
+ }
+ }
+#endif
+
return variant;
}
@@ -513,9 +661,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct s3c2410_wdt *wdt;
- struct resource *wdt_irq;
unsigned int wtcon;
- int started = 0;
+ int wdt_irq;
int ret;
wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
@@ -527,6 +674,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
wdt->wdt_device = s3c2410_wdd;
wdt->drv_data = s3c2410_get_wdt_drv_data(pdev);
+ if (!wdt->drv_data)
+ return -EINVAL;
+
if (wdt->drv_data->quirks & QUIRKS_HAVE_PMUREG) {
wdt->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node,
"samsung,syscon-phandle");
@@ -536,40 +686,52 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
}
}
- wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (wdt_irq == NULL) {
- dev_err(dev, "no irq resource specified\n");
- ret = -ENOENT;
- goto err;
- }
+ wdt_irq = platform_get_irq(pdev, 0);
+ if (wdt_irq < 0)
+ return wdt_irq;
/* get the memory region for the watchdog timer */
wdt->reg_base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(wdt->reg_base)) {
- ret = PTR_ERR(wdt->reg_base);
- goto err;
- }
+ if (IS_ERR(wdt->reg_base))
+ return PTR_ERR(wdt->reg_base);
- wdt->clock = devm_clk_get(dev, "watchdog");
- if (IS_ERR(wdt->clock)) {
- dev_err(dev, "failed to find watchdog clock source\n");
- ret = PTR_ERR(wdt->clock);
- goto err;
+ wdt->bus_clk = devm_clk_get(dev, "watchdog");
+ if (IS_ERR(wdt->bus_clk)) {
+ dev_err(dev, "failed to find bus clock\n");
+ return PTR_ERR(wdt->bus_clk);
}
- ret = clk_prepare_enable(wdt->clock);
+ ret = clk_prepare_enable(wdt->bus_clk);
if (ret < 0) {
- dev_err(dev, "failed to enable clock\n");
+ dev_err(dev, "failed to enable bus clock\n");
return ret;
}
+ /*
+ * "watchdog_src" clock is optional; if it's not present -- just skip it
+ * and use "watchdog" clock as both bus and source clock.
+ */
+ wdt->src_clk = devm_clk_get_optional(dev, "watchdog_src");
+ if (IS_ERR(wdt->src_clk)) {
+ dev_err_probe(dev, PTR_ERR(wdt->src_clk),
+ "failed to get source clock\n");
+ ret = PTR_ERR(wdt->src_clk);
+ goto err_bus_clk;
+ }
+
+ ret = clk_prepare_enable(wdt->src_clk);
+ if (ret) {
+ dev_err(dev, "failed to enable source clock\n");
+ goto err_bus_clk;
+ }
+
wdt->wdt_device.min_timeout = 1;
- wdt->wdt_device.max_timeout = s3c2410wdt_max_timeout(wdt->clock);
+ wdt->wdt_device.max_timeout = s3c2410wdt_max_timeout(wdt);
ret = s3c2410wdt_cpufreq_register(wdt);
if (ret < 0) {
dev_err(dev, "failed to register cpufreq\n");
- goto err_clk;
+ goto err_src_clk;
}
watchdog_set_drvdata(&wdt->wdt_device, wdt);
@@ -581,19 +743,19 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
wdt->wdt_device.timeout);
if (ret) {
- started = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
- S3C2410_WATCHDOG_DEFAULT_TIME);
-
- if (started == 0)
- dev_info(dev,
- "tmr_margin value out of range, default %d used\n",
+ ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
+ S3C2410_WATCHDOG_DEFAULT_TIME);
+ if (ret == 0) {
+ dev_warn(dev, "tmr_margin value out of range, default %d used\n",
S3C2410_WATCHDOG_DEFAULT_TIME);
- else
- dev_info(dev, "default timer value is out of range, cannot start\n");
+ } else {
+ dev_err(dev, "failed to use default timeout\n");
+ goto err_cpufreq;
+ }
}
- ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0,
- pdev->name, pdev);
+ ret = devm_request_irq(dev, wdt_irq, s3c2410wdt_irq, 0,
+ pdev->name, pdev);
if (ret != 0) {
dev_err(dev, "failed to install irq (%d)\n", ret);
goto err_cpufreq;
@@ -605,25 +767,29 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt);
wdt->wdt_device.parent = dev;
+ /*
+ * If "tmr_atboot" param is non-zero, start the watchdog right now. Also
+ * set WDOG_HW_RUNNING bit, so that watchdog core can kick the watchdog.
+ *
+ * If we're not enabling the watchdog, then ensure it is disabled if it
+ * has been left running from the bootloader or other source.
+ */
+ if (tmr_atboot) {
+ dev_info(dev, "starting watchdog timer\n");
+ s3c2410wdt_start(&wdt->wdt_device);
+ set_bit(WDOG_HW_RUNNING, &wdt->wdt_device.status);
+ } else {
+ s3c2410wdt_stop(&wdt->wdt_device);
+ }
+
ret = watchdog_register_device(&wdt->wdt_device);
if (ret)
goto err_cpufreq;
- ret = s3c2410wdt_mask_and_disable_reset(wdt, false);
+ ret = s3c2410wdt_enable(wdt, true);
if (ret < 0)
goto err_unregister;
- if (tmr_atboot && started == 0) {
- dev_info(dev, "starting watchdog timer\n");
- s3c2410wdt_start(&wdt->wdt_device);
- } else if (!tmr_atboot) {
- /* if we're not enabling the watchdog, then ensure it is
- * disabled if it has been left running from the bootloader
- * or other source */
-
- s3c2410wdt_stop(&wdt->wdt_device);
- }
-
platform_set_drvdata(pdev, wdt);
/* print out a statement of readiness */
@@ -643,10 +809,12 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
err_cpufreq:
s3c2410wdt_cpufreq_deregister(wdt);
- err_clk:
- clk_disable_unprepare(wdt->clock);
+ err_src_clk:
+ clk_disable_unprepare(wdt->src_clk);
+
+ err_bus_clk:
+ clk_disable_unprepare(wdt->bus_clk);
- err:
return ret;
}
@@ -655,7 +823,7 @@ static int s3c2410wdt_remove(struct platform_device *dev)
int ret;
struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
- ret = s3c2410wdt_mask_and_disable_reset(wdt, true);
+ ret = s3c2410wdt_enable(wdt, false);
if (ret < 0)
return ret;
@@ -663,7 +831,8 @@ static int s3c2410wdt_remove(struct platform_device *dev)
s3c2410wdt_cpufreq_deregister(wdt);
- clk_disable_unprepare(wdt->clock);
+ clk_disable_unprepare(wdt->src_clk);
+ clk_disable_unprepare(wdt->bus_clk);
return 0;
}
@@ -672,8 +841,7 @@ static void s3c2410wdt_shutdown(struct platform_device *dev)
{
struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
- s3c2410wdt_mask_and_disable_reset(wdt, true);
-
+ s3c2410wdt_enable(wdt, false);
s3c2410wdt_stop(&wdt->wdt_device);
}
@@ -688,7 +856,7 @@ static int s3c2410wdt_suspend(struct device *dev)
wdt->wtcon_save = readl(wdt->reg_base + S3C2410_WTCON);
wdt->wtdat_save = readl(wdt->reg_base + S3C2410_WTDAT);
- ret = s3c2410wdt_mask_and_disable_reset(wdt, true);
+ ret = s3c2410wdt_enable(wdt, false);
if (ret < 0)
return ret;
@@ -708,7 +876,7 @@ static int s3c2410wdt_resume(struct device *dev)
writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTCNT);/* Reset count */
writel(wdt->wtcon_save, wdt->reg_base + S3C2410_WTCON);
- ret = s3c2410wdt_mask_and_disable_reset(wdt, false);
+ ret = s3c2410wdt_enable(wdt, true);
if (ret < 0)
return ret;
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index 1c9ae08225d8..f916bf60b312 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -30,7 +30,7 @@ proc_bus_zorro_lseek(struct file *file, loff_t off, int whence)
static ssize_t
proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
- struct zorro_dev *z = PDE_DATA(file_inode(file));
+ struct zorro_dev *z = pde_data(file_inode(file));
struct ConfigDev cd;
loff_t pos = *ppos;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c72e9f8f5f32..9a10e68c5f30 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -43,6 +43,11 @@ static void v9fs_req_issue_op(struct netfs_read_subrequest *subreq)
iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len);
total = p9_client_read(fid, pos, &to, &err);
+
+ /* if we just extended the file size, any portion not in
+ * cache won't be on server and is zeroes */
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+
netfs_subreq_terminated(subreq, err ?: total, false);
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index be72ad9edb3e..2573c08f335c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -115,7 +115,6 @@ out_error:
static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
{
- int res = 0;
struct inode *inode = file_inode(filp);
p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
@@ -125,7 +124,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
- return res;
+ return 0;
}
static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
@@ -140,8 +139,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
fid = filp->private_data;
BUG_ON(fid == NULL);
- if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
- BUG();
+ BUG_ON((fl->fl_flags & FL_POSIX) != FL_POSIX);
res = locks_lock_file_wait(filp, fl);
if (res < 0)
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cae301d09cd3..d17502a738a9 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -552,7 +552,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
{
int retval, use_dentry = 0;
struct p9_fid *fid = NULL;
- struct p9_iattr_dotl p9attr;
+ struct p9_iattr_dotl p9attr = {
+ .uid = INVALID_UID,
+ .gid = INVALID_GID,
+ };
struct inode *inode = d_inode(dentry);
p9_debug(P9_DEBUG_VFS, "\n");
@@ -562,14 +565,22 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return retval;
p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
- p9attr.mode = iattr->ia_mode;
- p9attr.uid = iattr->ia_uid;
- p9attr.gid = iattr->ia_gid;
- p9attr.size = iattr->ia_size;
- p9attr.atime_sec = iattr->ia_atime.tv_sec;
- p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
- p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
- p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+ if (iattr->ia_valid & ATTR_MODE)
+ p9attr.mode = iattr->ia_mode;
+ if (iattr->ia_valid & ATTR_UID)
+ p9attr.uid = iattr->ia_uid;
+ if (iattr->ia_valid & ATTR_GID)
+ p9attr.gid = iattr->ia_gid;
+ if (iattr->ia_valid & ATTR_SIZE)
+ p9attr.size = iattr->ia_size;
+ if (iattr->ia_valid & ATTR_ATIME_SET) {
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ }
+ if (iattr->ia_valid & ATTR_MTIME_SET) {
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+ }
if (iattr->ia_valid & ATTR_FILE) {
fid = iattr->ia_file->private_data;
diff --git a/fs/Makefile b/fs/Makefile
index 84c5e4cdfee5..dab324aea08f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,6 +6,8 @@
# Rewritten to use lists instead of if-statements.
#
+obj-$(CONFIG_SYSCTL) += sysctls.o
+
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o dcache.o inode.o \
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index adbb3a1edcbf..5156821bfe6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -355,7 +355,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct super_block *sb = inode->i_sb;
struct object_info obj;
- int ret;
obj.indaddr = ADFS_I(inode)->indaddr;
obj.name_len = 0;
@@ -365,6 +364,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
obj.attr = ADFS_I(inode)->attr;
obj.size = inode->i_size;
- ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
- return ret;
+ return adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
}
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 065a28bfa3f1..e1b863449296 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -227,7 +227,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
rcu_read_lock();
return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos);
@@ -236,7 +236,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos);
}
@@ -322,7 +322,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
{
struct afs_vl_seq_net_private *priv = m->private;
struct afs_vlserver_list *vllist;
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
diff --git a/fs/aio.c b/fs/aio.c
index f6f1cbffef9e..4ceba13a7db0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -220,9 +220,35 @@ struct aio_kiocb {
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr; /* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+static unsigned long aio_nr; /* current system wide number of aio requests */
+static unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
/*----end sysctl variables---*/
+#ifdef CONFIG_SYSCTL
+static struct ctl_table aio_sysctls[] = {
+ {
+ .procname = "aio-nr",
+ .data = &aio_nr,
+ .maxlen = sizeof(aio_nr),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "aio-max-nr",
+ .data = &aio_max_nr,
+ .maxlen = sizeof(aio_max_nr),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {}
+};
+
+static void __init aio_sysctl_init(void)
+{
+ register_sysctl_init("fs", aio_sysctls);
+}
+#else
+#define aio_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
@@ -275,6 +301,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ aio_sysctl_init();
return 0;
}
__initcall(aio_setup);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8c7f26f1fbb..605017eb9349 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1116,11 +1116,11 @@ out_free_interp:
* independently randomized mmap region (0 load_bias
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
- if (interpreter) {
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ if (alignment > ELF_MIN_ALIGN) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
@@ -1585,7 +1585,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ get_task_comm(psinfo->pr_fname, p);
return 0;
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..ddea6acbddde 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -822,7 +822,11 @@ static int __init init_misc_binfmt(void)
int err = register_filesystem(&bm_fs_type);
if (!err)
insert_binfmt(&misc_format);
- return err;
+ if (!register_sysctl_mount_point("fs/binfmt_misc")) {
+ pr_warn("Failed to create fs/binfmt_misc sysctl mount point");
+ return -ENOMEM;
+ }
+ return 0;
}
static void __exit exit_misc_binfmt(void)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 520a0f6a7d9e..183e5c4aed34 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -18,8 +18,7 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
- depends on !PPC_256K_PAGES # powerpc
- depends on !PAGE_SIZE_256KB # hexagon
+ depends on PAGE_SIZE_LESS_THAN_256KB
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d6d48ecf823c..409bad3928db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -12,7 +12,6 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/prefetch.h>
-#include <linux/cleancache.h>
#include <linux/fsverity.h>
#include "misc.h"
#include "extent_io.h"
@@ -3578,15 +3577,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
goto out;
}
- if (!PageUptodate(page)) {
- if (cleancache_get_page(page) == 0) {
- BUG_ON(blocksize != PAGE_SIZE);
- unlock_extent(tree, start, end);
- unlock_page(page);
- goto out;
- }
- }
-
if (page->index == last_byte >> PAGE_SHIFT) {
size_t zero_offset = offset_in_page(last_byte);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0ec09fe01be6..4d947ba32da9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -23,7 +23,6 @@
#include <linux/miscdevice.h>
#include <linux/magic.h>
#include <linux/slab.h>
-#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/crc32c.h>
#include <linux/btrfs.h>
@@ -1374,7 +1373,6 @@ static int btrfs_fill_super(struct super_block *sb,
goto fail_close;
}
- cleancache_init_fs(sb);
sb->s_flags |= SB_ACTIVE;
return 0;
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index ce4d4785003c..7077f72e6f47 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
}
- /* check parameters */
+ /* Check features of the backing filesystem:
+ * - Directories must support looking up and directory creation
+ * - We create tmpfiles to handle invalidation
+ * - We use xattrs to store metadata
+ * - We need to be able to query the amount of space available
+ * - We want to be able to sync the filesystem when stopping the cache
+ * - We use DIO to/from pages, so the blocksize mustn't be too big.
+ */
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
!d_backing_inode(root)->i_op->lookup ||
!d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->tmpfile ||
!(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs ||
@@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
cache->bsize = stats.f_bsize;
- cache->bshift = 0;
- if (stats.f_bsize < PAGE_SIZE)
- cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
+ cache->bshift = ilog2(stats.f_bsize);
_debug("blksize %u (shift %u)",
cache->bsize, cache->bshift);
@@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop);
- stats.f_blocks >>= cache->bshift;
do_div(stats.f_blocks, 100);
cache->bstop = stats.f_blocks * cache->bstop_percent;
cache->bcull = stats.f_blocks * cache->bcull_percent;
@@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
return ret;
}
- b_avail = stats.f_bavail >> cache->bshift;
+ b_avail = stats.f_bavail;
b_writing = atomic_long_read(&cache->b_writing);
if (b_avail > b_writing)
b_avail -= b_writing;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 40a792421fc1..7ac04ee2c0a0 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
return -EBUSY;
}
+ /* Make sure we have copies of the tag string */
+ if (!cache->tag) {
+ /*
+ * The tag string is released by the fops->release()
+ * function, so we don't release it on error here
+ */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
return cachefiles_add_cache(cache);
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8dd54d9375b6..c793d33b0224 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -86,7 +86,7 @@ struct cachefiles_cache {
unsigned bcull_percent; /* when to start culling (% blocks) */
unsigned bstop_percent; /* when to stop allocating (% blocks) */
unsigned bsize; /* cache's block size */
- unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
+ unsigned bshift; /* ilog2(bsize) */
uint64_t frun; /* when to stop culling */
uint64_t fcull; /* when to start culling */
uint64_t fstop; /* when to stop allocating */
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 60b1eac2ce78..04eb52736990 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -264,7 +264,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
ki->term_func = term_func;
ki->term_func_priv = term_func_priv;
ki->was_async = true;
- ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift;
+ ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
if (ki->term_func)
ki->iocb.ki_complete = cachefiles_write_complete;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 9bd692870617..f256c8aff7bb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_active(object, inode);
can_use = true;
} else {
- pr_notice("cachefiles: Inode already in use: %pd\n", dentry);
+ trace_cachefiles_mark_failed(object, inode);
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, inode->i_ino);
}
return can_use;
@@ -101,6 +103,7 @@ retry:
subdir = lookup_one_len(dirname, dir, strlen(dirname));
else
subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
PTR_ERR(subdir),
@@ -135,6 +138,7 @@ retry:
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
+ trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
@@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
};
int ret;
- trace_cachefiles_unlink(object, dentry, why);
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
ret = security_path_unlink(&path, dentry);
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
@@ -386,7 +390,7 @@ try_again:
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
- trace_cachefiles_rename(object, rep, grave, why);
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
ret = cachefiles_inject_read_error();
if (ret == 0)
ret = vfs_rename(&rd);
@@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
object->d_name_len);
else
dentry = ERR_PTR(ret);
- trace_cachefiles_lookup(object, dentry);
+ trace_cachefiles_lookup(object, fan, dentry);
if (IS_ERR(dentry)) {
if (dentry == ERR_PTR(-ENOENT))
goto new_file;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3d9459c9bbd..c98e5238a1b6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -297,10 +297,6 @@ out:
dout("%s: result %d\n", __func__, err);
}
-static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
-{
-}
-
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
@@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
}
static const struct netfs_read_request_ops ceph_netfs_read_ops = {
- .init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d2c33cdbac6..7d305b974824 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
(extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
set_nlink(inode, le32_to_cpu(grant->nlink));
- if (inode->i_nlink == 0 &&
- (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+ if (inode->i_nlink == 0)
deleted_inode = true;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9d9304e712d9..5b9104b8e453 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
int fmode, bool isdir)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mount_options *opt =
+ ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi;
dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
if (!fi)
return -ENOMEM;
+ if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ fi->flags |= CEPH_F_SYNC;
+
file->private_data = fi;
}
@@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct ceph_inode_info *ci = ceph_inode(inode);
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
- int want, got = 0;
+ int want = 0, got = 0;
int retry_op = 0, read = 0;
again:
@@ -1556,13 +1561,14 @@ again:
else
ceph_start_io_read(inode);
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_CACHE;
+ want |= CEPH_CAP_FILE_LAZYIO;
+
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_read(inode);
@@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
- int err, want, got;
+ int err, want = 0, got;
bool direct_lock = false;
u32 map_flags;
u64 pool_flags;
@@ -1771,10 +1777,10 @@ retry_snap:
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_BUFFER;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_BUFFER;
+ want |= CEPH_CAP_FILE_LAZYIO;
got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c57699d8408d..0fcba68f9a99 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
msg->hdr.version = cpu_to_le16(1);
msg->hdr.compat_version = cpu_to_le16(1);
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- dout("client%llu send metrics to mds%d\n",
- ceph_client_gid(mdsc->fsc->client), s->s_mds);
ceph_con_send(&s->s_con, msg);
return true;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 620c691af40e..a338a3ec0dc4 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
/* if root is the real CephFS root, we don't have quota realms */
if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false;
+ /* MDS stray dirs have no quota realms */
+ if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
+ return false;
/* otherwise, we can't know for sure */
return true;
}
@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
if (ci->i_max_bytes) {
total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+ /* For quota size less than 4MB, use 4KB block size */
+ if (!total) {
+ total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
+ used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
/* It is possible for a quota to be exceeded.
* Report 'zero' in that case
*/
free = total > used ? total - used : 0;
+ /* For quota size less than 4KB, report the
+ * total=used=4KB,free=0 when quota is full
+ * and total=free=4KB, used=0 otherwise */
+ if (!total) {
+ total = 1;
+ free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
}
spin_unlock(&ci->i_ceph_lock);
if (total) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bea89bdb534a..bf79f369aec6 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
+#include <uapi/linux/magic.h>
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
@@ -146,6 +148,7 @@ enum {
Opt_mds_namespace,
Opt_recover_session,
Opt_source,
+ Opt_mon_addr,
/* string args above */
Opt_dirstat,
Opt_rbytes,
@@ -159,6 +162,7 @@ enum {
Opt_quotadf,
Opt_copyfrom,
Opt_wsync,
+ Opt_pagecache,
};
enum ceph_recover_session_mode {
@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
+ fsparam_flag_no ("pagecache", Opt_pagecache),
{}
};
@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
}
/*
- * Parse the source parameter. Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+ const char *namespace, size_t len)
+{
+ return !(fsopt->mds_namespace &&
+ (strlen(fsopt->mds_namespace) != len ||
+ strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ int r;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ if (*dev_name_end != ':')
+ return invalfc(fc, "separator ':' missing in source");
+
+ r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+ pctx->copts, fc->log.log, ',');
+ if (r)
+ return r;
+
+ fsopt->new_dev_syntax = false;
+ return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ size_t len;
+ struct ceph_fsid fsid;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+ char *fsid_start, *fs_name_start;
+
+ if (*dev_name_end != '=') {
+ dout("separator '=' missing in source");
+ return -EINVAL;
+ }
+
+ fsid_start = strchr(dev_name, '@');
+ if (!fsid_start)
+ return invalfc(fc, "missing cluster fsid");
+ ++fsid_start; /* start of cluster fsid */
+
+ fs_name_start = strchr(fsid_start, '.');
+ if (!fs_name_start)
+ return invalfc(fc, "missing file system name");
+
+ if (ceph_parse_fsid(fsid_start, &fsid))
+ return invalfc(fc, "Invalid FSID");
+
+ ++fs_name_start; /* start of file system name */
+ len = dev_name_end - fs_name_start;
+
+ if (!namespace_equals(fsopt, fs_name_start, len))
+ return invalfc(fc, "Mismatching mds_namespace");
+ kfree(fsopt->mds_namespace);
+ fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+ if (!fsopt->mds_namespace)
+ return -ENOMEM;
+ dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+ fsopt->new_dev_syntax = true;
+ return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ * <device_spec>=/<path>
+ * where
+ * <device_spec> is name@fsid.fsname
+ * <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
*
- * The source will look like:
+ * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>]
* where
* <server_spec> is <ip>[:<port>]
@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name);
}
- dev_name_end--; /* back up to ':' separator */
- if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalfc(fc, "No path or : separator in source");
+ dev_name_end--; /* back up to separator */
+ if (dev_name_end < dev_name)
+ return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
- ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc->log.log);
- if (ret)
- return ret;
+ dout("trying new device syntax");
+ ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+ if (ret) {
+ if (ret != -EINVAL)
+ return ret;
+ dout("trying old device syntax");
+ ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+ if (ret)
+ return ret;
+ }
fc->source = param->string;
param->string = NULL;
return 0;
}
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+ struct fs_context *fc)
+{
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ kfree(fsopt->mon_addr);
+ fsopt->mon_addr = param->string;
+ param->string = NULL;
+
+ return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+ pctx->copts, fc->log.log, '/');
+}
+
static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param)
{
@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL;
break;
case Opt_mds_namespace:
+ if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+ return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string;
param->string = NULL;
@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source)
return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
+ case Opt_mon_addr:
+ return ceph_parse_mon_addr(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
+ case Opt_pagecache:
+ if (result.negated)
+ fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+ else
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+ break;
default:
BUG();
}
@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace);
kfree(args->server_path);
kfree(args->fscache_uniq);
+ kfree(args->mon_addr);
kfree(args);
}
@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+ if (ret)
+ return ret;
+
return ceph_compare_options(new_opt, fsc->client);
}
@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
- if (fsopt->mds_namespace)
+ /* dump mds_namespace when old device syntax is in use */
+ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+ if (fsopt->mon_addr)
+ seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ seq_puts(m, ",nopagecache");
+
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb;
struct ceph_fs_client *fsc;
struct dentry *res;
@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
+ if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+ return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+ kfree(fsc->mount_options->mon_addr);
+ fsc->mount_options->mon_addr = fsopt->mon_addr;
+ fsopt->mon_addr = NULL;
+ pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+ }
+
sync_filesystem(fc->root->d_sb);
return 0;
}
@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+ .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
module_init(init_ceph);
module_exit(exit_ceph);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d0142cc5c41b..67f145e1ae7a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -24,13 +24,11 @@
#include <linux/fscache.h>
#endif
-/* f_type in struct statfs */
-#define CEPH_SUPER_MAGIC 0x00c36400
-
/* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */
#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
+#define CEPH_4K_BLOCK_SHIFT 12 /* 4 KB */
#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
@@ -44,6 +42,7 @@
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
+#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
#define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \
@@ -88,6 +87,8 @@ struct ceph_mount_options {
unsigned int max_readdir; /* max readdir result (entries) */
unsigned int max_readdir_bytes; /* max readdir result (bytes) */
+ bool new_dev_syntax;
+
/*
* everything above this point can be memcmp'd; everything below
* is handled in compare_mount_options()
@@ -97,6 +98,7 @@ struct ceph_mount_options {
char *mds_namespace; /* default NULL */
char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */
+ char *mon_addr;
};
struct ceph_fs_client {
@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
*
* These come from src/mds/mdstypes.h in the ceph sources.
*/
-#define CEPH_MAX_MDS 0x100
-#define CEPH_NUM_STRAY 10
+#define CEPH_MAX_MDS 0x100
+#define CEPH_NUM_STRAY 10
#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
+#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS)
#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
{
- if (vino.ino < CEPH_INO_SYSTEM_BASE &&
- vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
- WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
- return true;
- }
- return false;
+ if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
+ vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
+ return false;
+
+ /* Don't warn on mdsdirs */
+ WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
+ "Attempt to access reserved inode number 0x%llx",
+ vino.ino);
+ return true;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 346ae8716deb..3b7e3b9e4fd2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -188,7 +188,7 @@ config CIFS_SMB_DIRECT
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
- depends on CIFS=m && FSCACHE_OLD_API || CIFS=y && FSCACHE_OLD_API=y
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
help
Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
to be cached locally on disk through the general filesystem cache
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 87fcacdf3de7..cc8fdcb35b71 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -25,7 +25,7 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o
cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
-cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o
cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
deleted file mode 100644
index 8be57aaedab6..000000000000
--- a/fs/cifs/cache.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * CIFS filesystem cache index structure definitions
- *
- * Copyright (c) 2010 Novell, Inc.
- * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
- *
- */
-#include "fscache.h"
-#include "cifs_debug.h"
-
-/*
- * CIFS filesystem definition for FS-Cache
- */
-struct fscache_netfs cifs_fscache_netfs = {
- .name = "cifs",
- .version = 0,
-};
-
-/*
- * Register CIFS for caching with FS-Cache
- */
-int cifs_fscache_register(void)
-{
- return fscache_register_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Unregister CIFS for caching
- */
-void cifs_fscache_unregister(void)
-{
- fscache_unregister_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Server object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_server_index_def = {
- .name = "CIFS.server",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
-};
-
-static enum
-fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_super_auxdata auxdata;
- const struct cifs_tcon *tcon = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*
- * Superblock object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_super_index_def = {
- .name = "CIFS.super",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .check_aux = cifs_fscache_super_check_aux,
-};
-
-static enum
-fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-const struct fscache_cookie_def cifs_fscache_inode_object_def = {
- .name = "CIFS.uniqueid",
- .type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .check_aux = cifs_fscache_inode_check_aux,
-};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index d282caf9f037..ea00e1a91250 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -416,11 +416,17 @@ skip_rdma:
from_kuid(&init_user_ns, ses->cred_uid));
spin_lock(&ses->chan_lock);
+ if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0))
+ seq_puts(m, "\tPrimary channel: DISCONNECTED ");
+
if (ses->chan_count > 1) {
seq_printf(m, "\n\n\tExtra Channels: %zu ",
ses->chan_count-1);
- for (j = 1; j < ses->chan_count; j++)
+ for (j = 1; j < ses->chan_count; j++) {
cifs_dump_channel(m, j, &ses->chans[j]);
+ if (CIFS_CHAN_NEEDS_RECONNECT(ses, j))
+ seq_puts(m, "\tDISCONNECTED ");
+ }
}
spin_unlock(&ses->chan_lock);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 353bd0dd7026..342717bf1dc2 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -84,9 +84,9 @@ struct key_type cifs_spnego_key_type = {
/* get a key struct with a SPNEGO security blob, suitable for session setup */
struct key *
-cifs_get_spnego_key(struct cifs_ses *sesInfo)
+cifs_get_spnego_key(struct cifs_ses *sesInfo,
+ struct TCP_Server_Info *server)
{
- struct TCP_Server_Info *server = cifs_ses_server(sesInfo);
struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
char *description, *dp;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index e6a0451877d4..7f102ffeb675 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -29,7 +29,8 @@ struct cifs_spnego_msg {
#ifdef __KERNEL__
extern struct key_type cifs_spnego_key_type;
-extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo);
+extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo,
+ struct TCP_Server_Info *server);
#endif /* KERNEL */
#endif /* _CIFS_SPNEGO_H */
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 23a1ed2fb769..463ebe34892b 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch
switch (state) {
case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
@@ -498,10 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
goto unlock;
}
- spin_lock(&GlobalMid_Lock);
- if (tcon->ses->server->tcpStatus != CifsExiting)
- tcon->ses->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ cifs_reconnect(tcon->ses->server, false);
unlock:
mutex_unlock(&tcon->ses->server->srv_mutex);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d118282071b3..0912d8bbbac1 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -141,9 +141,13 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
if ((cifs_pdu == NULL) || (server == NULL))
return -EINVAL;
+ spin_lock(&cifs_tcp_ses_lock);
if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) ||
- server->tcpStatus == CifsNeedNegotiate)
+ server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return rc;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (!server->session_estab) {
memcpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index dca42aa87d30..199edac0cb59 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -26,6 +26,7 @@
#include <linux/random.h>
#include <linux/uuid.h>
#include <linux/xattr.h>
+#include <uapi/linux/magic.h>
#include <net/ipv6.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -202,7 +203,7 @@ cifs_read_super(struct super_block *sb)
sb->s_time_max = ts.tv_sec;
}
- sb->s_magic = CIFS_MAGIC_NUMBER;
+ sb->s_magic = CIFS_SUPER_MAGIC;
sb->s_op = &cifs_super_ops;
sb->s_xattr = cifs_xattr_handlers;
rc = super_setup_bdi(sb);
@@ -396,6 +397,9 @@ static void
cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
+ if (inode->i_state & I_PINNING_FSCACHE_WB)
+ cifs_fscache_unuse_inode_cookie(inode, true);
+ cifs_fscache_release_inode_cookie(inode);
clear_inode(inode);
}
@@ -720,6 +724,12 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
}
#endif
+static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ fscache_unpin_writeback(wbc, cifs_inode_cookie(inode));
+ return 0;
+}
+
static int cifs_drop_inode(struct inode *inode)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -732,6 +742,7 @@ static int cifs_drop_inode(struct inode *inode)
static const struct super_operations cifs_super_ops = {
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
+ .write_inode = cifs_write_inode,
.free_inode = cifs_free_inode,
.drop_inode = cifs_drop_inode,
.evict_inode = cifs_evict_inode,
@@ -773,7 +784,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
sep = CIFS_DIR_SEP(cifs_sb);
dentry = dget(sb->s_root);
- p = s = full_path;
+ s = full_path;
do {
struct inode *dir = d_inode(dentry);
@@ -1624,13 +1635,9 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
- rc = cifs_fscache_register();
- if (rc)
- goto out_destroy_deferredclose_wq;
-
rc = cifs_init_inodecache();
if (rc)
- goto out_unreg_fscache;
+ goto out_destroy_deferredclose_wq;
rc = cifs_init_mids();
if (rc)
@@ -1692,8 +1699,6 @@ out_destroy_mids:
cifs_destroy_mids();
out_destroy_inodecache:
cifs_destroy_inodecache();
-out_unreg_fscache:
- cifs_fscache_unregister();
out_destroy_deferredclose_wq:
destroy_workqueue(deferredclose_wq);
out_destroy_cifsoplockd_wq:
@@ -1729,7 +1734,6 @@ exit_cifs(void)
cifs_destroy_request_bufs();
cifs_destroy_mids();
cifs_destroy_inodecache();
- cifs_fscache_unregister();
destroy_workqueue(deferredclose_wq);
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9e5d9e192ef0..15a5c5db038b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,6 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.34"
+#define SMB3_PRODUCT_BUILD 35
+#define CIFS_VERSION "2.35"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index be74606724c7..48b343d03430 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -24,8 +24,6 @@
#include "../smbfs_common/smb2pdu.h"
#include "smb2pdu.h"
-#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
-
#define SMB_PATH_MAX 260
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -113,7 +111,14 @@ enum statusEnum {
CifsGood,
CifsExiting,
CifsNeedReconnect,
- CifsNeedNegotiate
+ CifsNeedNegotiate,
+ CifsInNegotiate,
+ CifsNeedSessSetup,
+ CifsInSessSetup,
+ CifsNeedTcon,
+ CifsInTcon,
+ CifsNeedFilesInvalidate,
+ CifsInFilesInvalidate
};
enum securityEnum {
@@ -263,13 +268,16 @@ struct smb_version_operations {
/* check if we need to negotiate */
bool (*need_neg)(struct TCP_Server_Info *);
/* negotiate to the server */
- int (*negotiate)(const unsigned int, struct cifs_ses *);
+ int (*negotiate)(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
/* set negotiated write size */
unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
/* set negotiated read size */
unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
/* setup smb sessionn */
int (*sess_setup)(const unsigned int, struct cifs_ses *,
+ struct TCP_Server_Info *server,
const struct nls_table *);
/* close smb session */
int (*logoff)(const unsigned int, struct cifs_ses *);
@@ -414,7 +422,8 @@ struct smb_version_operations {
void (*set_lease_key)(struct inode *, struct cifs_fid *);
/* generate new lease key */
void (*new_lease_key)(struct cifs_fid *);
- int (*generate_signingkey)(struct cifs_ses *);
+ int (*generate_signingkey)(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *,
bool allocate_crypto);
int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
@@ -582,7 +591,7 @@ struct TCP_Server_Info {
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
struct smb_version_operations *ops;
struct smb_version_values *vals;
- /* updates to tcpStatus protected by GlobalMid_Lock */
+ /* updates to tcpStatus protected by cifs_tcp_ses_lock */
enum statusEnum tcpStatus; /* what we think the status is */
char *hostname; /* hostname portion of UNC string */
struct socket *ssocket;
@@ -659,9 +668,6 @@ struct TCP_Server_Info {
unsigned int total_read; /* total amount of data read in this pass */
atomic_t in_send; /* requests trying to send */
atomic_t num_waiters; /* blocked waiting to get in sendrecv */
-#ifdef CONFIG_CIFS_FSCACHE
- struct fscache_cookie *fscache; /* client index cache cookie */
-#endif
#ifdef CONFIG_CIFS_STATS2
atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */
atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */
@@ -915,12 +921,13 @@ struct cifs_chan {
*/
struct cifs_ses {
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
struct list_head tcon_list;
struct cifs_tcon *tcon_ipc;
struct mutex session_mutex;
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
- enum statusEnum status; /* updates protected by GlobalMid_Lock */
+ enum statusEnum status; /* updates protected by cifs_tcp_ses_lock */
unsigned overrideSecFlg; /* if non-zero override global sec flags */
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
@@ -939,17 +946,13 @@ struct cifs_ses {
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
enum securityEnum sectype; /* what security flavor was specified? */
bool sign; /* is signing required? */
- bool need_reconnect:1; /* connection reset, uid now invalid */
bool domainAuto:1;
- bool binding:1; /* are we binding the session? */
__u16 session_flags;
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
- __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
-
/*
* Network interfaces available on the server this session is
* connected to.
@@ -969,45 +972,34 @@ struct cifs_ses {
spinlock_t chan_lock;
/* ========= begin: protected by chan_lock ======== */
#define CIFS_MAX_CHANNELS 16
+#define CIFS_ALL_CHANNELS_SET(ses) \
+ ((1UL << (ses)->chan_count) - 1)
+#define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \
+ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses))
+#define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \
+ ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses))
+#define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \
+ test_bit((index), &(ses)->chans_need_reconnect)
+
struct cifs_chan chans[CIFS_MAX_CHANNELS];
- struct cifs_chan *binding_chan;
size_t chan_count;
size_t chan_max;
atomic_t chan_seq; /* round robin state */
+
+ /*
+ * chans_need_reconnect is a bitmap indicating which of the channels
+ * under this smb session needs to be reconnected.
+ * If not multichannel session, only one bit will be used.
+ *
+ * We will ask for sess and tcon reconnection only if all the
+ * channels are marked for needing reconnection. This will
+ * enable the sessions on top to continue to live till any
+ * of the channels below are active.
+ */
+ unsigned long chans_need_reconnect;
/* ========= end: protected by chan_lock ======== */
};
-/*
- * When binding a new channel, we need to access the channel which isn't fully
- * established yet.
- */
-
-static inline
-struct cifs_chan *cifs_ses_binding_channel(struct cifs_ses *ses)
-{
- if (ses->binding)
- return ses->binding_chan;
- else
- return NULL;
-}
-
-/*
- * Returns the server pointer of the session. When binding a new
- * channel this returns the last channel which isn't fully established
- * yet.
- *
- * This function should be use for negprot/sess.setup codepaths. For
- * the other requests see cifs_pick_channel().
- */
-static inline
-struct TCP_Server_Info *cifs_ses_server(struct cifs_ses *ses)
-{
- if (ses->binding)
- return ses->binding_chan->server;
- else
- return ses->server;
-}
-
static inline bool
cap_unix(struct cifs_ses *ses)
{
@@ -1117,7 +1109,7 @@ struct cifs_tcon {
__u32 max_bytes_copy;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
- struct fscache_cookie *fscache; /* cookie for share */
+ struct fscache_volume *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fid crfid; /* Cached root fid */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index d2ff438fd31f..68b9a436af4b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2560,7 +2560,7 @@ typedef struct {
__le32 EaSize; /* length of the xattrs */
__u8 ShortNameLength;
__u8 Reserved;
- __u8 ShortName[12];
+ __u8 ShortName[24];
char FileName[1];
} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4f5a3e857df4..d3701295402d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -131,7 +131,11 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *in_buf ,
struct smb_hdr *out_buf,
int *bytes_returned);
-extern int cifs_reconnect(struct TCP_Server_Info *server);
+void
+cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session);
+extern int cifs_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session);
extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
extern bool backup_cred(struct cifs_sb_info *);
@@ -164,6 +168,7 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
extern enum securityEnum select_sectype(struct TCP_Server_Info *server,
enum securityEnum requested);
extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
extern u64 cifs_UnixTimeToNT(struct timespec64);
@@ -293,11 +298,15 @@ extern int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon,
const struct nls_table *nlsc);
extern int cifs_negotiate_protocol(const unsigned int xid,
- struct cifs_ses *ses);
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct nls_table *nls_info);
extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required);
-extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses);
+extern int CIFSSMBNegotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
const char *tree, struct cifs_tcon *tcon,
@@ -504,8 +513,10 @@ extern int cifs_verify_signature(struct smb_rqst *rqst,
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server);
extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb30signingkey(struct cifs_ses *);
-extern int generate_smb311signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+extern int generate_smb311signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int CIFSSMBCopy(unsigned int xid,
struct cifs_tcon *source_tcon,
@@ -601,6 +612,19 @@ bool is_server_using_iface(struct TCP_Server_Info *server,
bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
void cifs_ses_mark_for_reconnect(struct cifs_ses *ses);
+unsigned int
+cifs_ses_get_chan_index(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
+cifs_chan_set_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
+cifs_chan_clear_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+bool
+cifs_chan_needs_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+
void extract_unc_hostname(const char *unc, const char **h, size_t *len);
int copy_path_name(char *dst, const char *src);
int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
@@ -626,6 +650,11 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
int match_target_ip(struct TCP_Server_Info *server,
const char *share, size_t share_len,
bool *result);
+
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *dfs_link_path);
#endif
static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 243d17696f06..071e2f21a7db 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -73,6 +73,16 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
struct list_head *tmp;
struct list_head *tmp1;
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ tcon->tidStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return;
+ }
+ tcon->tidStatus = CifsInFilesInvalidate;
+ spin_unlock(&cifs_tcp_ses_lock);
+
/* list all files open on tree connection and mark them invalid */
spin_lock(&tcon->open_file_lock);
list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
@@ -89,6 +99,11 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
mutex_unlock(&tcon->crfid.fid_mutex);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInFilesInvalidate)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
@@ -120,15 +135,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* only tree disconnect, open, and write, (and ulogoff which does not
* have tcon) are allowed as we start force umount
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (tcon->tidStatus == CifsExiting) {
if (smb_command != SMB_COM_WRITE_ANDX &&
smb_command != SMB_COM_OPEN_ANDX &&
smb_command != SMB_COM_TREE_DISCONNECT) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "can not send cmd %d while umounting\n",
smb_command);
return -ENODEV;
}
}
+ spin_unlock(&cifs_tcp_ses_lock);
retries = server->nr_targets;
@@ -148,8 +166,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
/* are we still trying to reconnect? */
- if (server->tcpStatus != CifsNeedReconnect)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
break;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (retries && --retries)
continue;
@@ -166,31 +188,49 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
retries = server->nr_targets;
}
- if (!ses->need_reconnect && !tcon->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
nls_codepage = load_nls_default();
/*
- * need to prevent multiple threads trying to simultaneously
- * reconnect the same SMB session
- */
- mutex_lock(&ses->session_mutex);
-
- /*
* Recheck after acquire mutex. If another thread is negotiating
* and the server never sends an answer the socket will be closed
* and tcpStatus set to reconnect.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EHOSTDOWN;
- mutex_unlock(&ses->session_mutex);
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
- rc = cifs_negotiate_protocol(0, ses);
- if (rc == 0 && ses->need_reconnect)
- rc = cifs_setup_session(0, ses, nls_codepage);
+ /*
+ * need to prevent multiple threads trying to simultaneously
+ * reconnect the same SMB session
+ */
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
+
+ /* this means that we only need to tree connect */
+ if (tcon->need_reconnect)
+ goto skip_sess_setup;
+
+ rc = -EHOSTDOWN;
+ goto out;
+ }
+ spin_unlock(&ses->chan_lock);
+
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(0, ses, server);
+ if (!rc)
+ rc = cifs_setup_session(0, ses, server, nls_codepage);
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
@@ -198,6 +238,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
goto out;
}
+skip_sess_setup:
cifs_mark_open_files_invalid(tcon);
rc = cifs_tree_connect(0, tcon, nls_codepage);
mutex_unlock(&ses->session_mutex);
@@ -337,8 +378,13 @@ static int
smb_init_no_reconnect(int smb_command, int wct, struct cifs_tcon *tcon,
void **request_buf, void **response_buf)
{
- if (tcon->ses->need_reconnect || tcon->need_reconnect)
+ spin_lock(&tcon->ses->chan_lock);
+ if (cifs_chan_needs_reconnect(tcon->ses, tcon->ses->server) ||
+ tcon->need_reconnect) {
+ spin_unlock(&tcon->ses->chan_lock);
return -EHOSTDOWN;
+ }
+ spin_unlock(&tcon->ses->chan_lock);
return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
}
@@ -476,14 +522,15 @@ should_set_ext_sec_flag(enum securityEnum sectype)
}
int
-CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
+CIFSSMBNegotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
NEGOTIATE_REQ *pSMB;
NEGOTIATE_RSP *pSMBr;
int rc = 0;
int bytes_returned;
int i;
- struct TCP_Server_Info *server = ses->server;
u16 count;
if (!server) {
@@ -600,8 +647,12 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon)
* the tcon is no longer on the list, so no need to take lock before
* checking this.
*/
- if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
- return 0;
+ spin_lock(&tcon->ses->chan_lock);
+ if ((tcon->need_reconnect) || CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses)) {
+ spin_unlock(&tcon->ses->chan_lock);
+ return -EIO;
+ }
+ spin_unlock(&tcon->ses->chan_lock);
rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
(void **)&smb_buffer);
@@ -696,9 +747,14 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses)
return -EIO;
mutex_lock(&ses->session_mutex);
- if (ses->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
+ spin_unlock(&ses->chan_lock);
goto session_already_dead; /* no need to send SMBlogoff if uid
already closed due to reconnect */
+ }
+ spin_unlock(&ses->chan_lock);
+
rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB);
if (rc) {
mutex_unlock(&ses->session_mutex);
@@ -1401,7 +1457,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1060164b984a..11a22a30ee14 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -166,36 +166,57 @@ static void cifs_resolve_server(struct work_struct *work)
* Mark all sessions and tcons for reconnect.
*
* @server needs to be previously set to CifsNeedReconnect.
+ *
*/
-static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server)
+void
+cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct mid_q_entry *mid, *nmid;
struct list_head retry_list;
- struct TCP_Server_Info *pserver;
server->maxBuf = 0;
server->max_read = 0;
- cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
/*
* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
* are not used until reconnected.
*/
- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
+ cifs_dbg(FYI, "%s: marking necessary sessions and tcons for reconnect\n", __func__);
/* If server is a channel, select the primary channel */
pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
- ses->need_reconnect = true;
- list_for_each_entry(tcon, &ses->tcon_list, tcon_list)
+ spin_lock(&ses->chan_lock);
+ if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
+ goto next_session;
+
+ if (mark_smb_session)
+ CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses);
+ else
+ cifs_chan_set_need_reconnect(ses, server);
+
+ /* If all channels need reconnect, then tcon needs reconnect */
+ if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
+ goto next_session;
+
+ ses->status = CifsNeedReconnect;
+
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
tcon->need_reconnect = true;
+ tcon->tidStatus = CifsNeedReconnect;
+ }
if (ses->tcon_ipc)
ses->tcon_ipc->need_reconnect = true;
+
+next_session:
+ spin_unlock(&ses->chan_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -248,16 +269,21 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server
static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
{
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->nr_targets = num_targets;
if (server->tcpStatus == CifsExiting) {
/* the demux thread will exit normally next time through the loop */
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up(&server->response_q);
return false;
}
+
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id,
+ server->hostname);
server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+
+ spin_unlock(&cifs_tcp_ses_lock);
return true;
}
@@ -268,15 +294,21 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num
* mark all smb sessions as reconnecting for tcp session
* reconnect tcp session
* wake up waiters on reconnection? - (not needed currently)
+ *
+ * if mark_smb_session is passed as true, unconditionally mark
+ * the smb session (and tcon) for reconnect as well. This value
+ * doesn't really matter for non-multichannel scenario.
+ *
*/
-static int __cifs_reconnect(struct TCP_Server_Info *server)
+static int __cifs_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
int rc = 0;
if (!cifs_tcp_ses_needs_reconnect(server, 1))
return 0;
- cifs_mark_tcp_ses_conns_for_reconnect(server);
+ cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
do {
try_to_freeze();
@@ -299,17 +331,20 @@ static int __cifs_reconnect(struct TCP_Server_Info *server)
} else {
atomic_inc(&tcpSesReconnectCount);
set_credits(server, 1);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
}
} while (server->tcpStatus == CifsNeedReconnect);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up(&server->response_q);
return rc;
@@ -371,7 +406,9 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_
return rc;
}
-static int reconnect_dfs_server(struct TCP_Server_Info *server)
+static int
+reconnect_dfs_server(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
int rc = 0;
const char *refpath = server->current_fullpath + 1;
@@ -395,7 +432,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
return 0;
- cifs_mark_tcp_ses_conns_for_reconnect(server);
+ cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
do {
try_to_freeze();
@@ -416,12 +453,13 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
*/
atomic_inc(&tcpSesReconnectCount);
set_credits(server, 1);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
} while (server->tcpStatus == CifsNeedReconnect);
if (target_hint)
@@ -430,29 +468,32 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
dfs_cache_free_tgts(&tl);
/* Need to set up echo worker again once connection has been established */
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
+ spin_unlock(&cifs_tcp_ses_lock);
+
wake_up(&server->response_q);
return rc;
}
-int cifs_reconnect(struct TCP_Server_Info *server)
+int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
{
/* If tcp session is not an dfs connection, then reconnect to last target server */
spin_lock(&cifs_tcp_ses_lock);
if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
spin_unlock(&cifs_tcp_ses_lock);
- return __cifs_reconnect(server);
+ return __cifs_reconnect(server, mark_smb_session);
}
spin_unlock(&cifs_tcp_ses_lock);
- return reconnect_dfs_server(server);
+ return reconnect_dfs_server(server, mark_smb_session);
}
#else
-int cifs_reconnect(struct TCP_Server_Info *server)
+int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
{
- return __cifs_reconnect(server);
+ return __cifs_reconnect(server, mark_smb_session);
}
#endif
@@ -534,15 +575,18 @@ server_unresponsive(struct TCP_Server_Info *server)
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((server->tcpStatus == CifsGood ||
server->tcpStatus == CifsNeedNegotiate) &&
(!server->ops->can_echo || server->ops->can_echo(server)) &&
time_after(jiffies, server->lstrp + 3 * server->echo_interval)) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n",
(3 * server->echo_interval) / HZ);
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return true;
}
+ spin_unlock(&cifs_tcp_ses_lock);
return false;
}
@@ -576,7 +620,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
/* reconnect if no credits and no requests in flight */
if (zero_credits(server)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return -ECONNABORTED;
}
@@ -587,13 +631,17 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
else
length = sock_recvmsg(server->ssocket, smb_msg, 0);
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ESHUTDOWN;
+ }
if (server->tcpStatus == CifsNeedReconnect) {
- cifs_reconnect(server);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ECONNABORTED;
}
+ spin_unlock(&cifs_tcp_ses_lock);
if (length == -ERESTARTSYS ||
length == -EAGAIN ||
@@ -610,7 +658,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (length <= 0) {
cifs_dbg(FYI, "Received no data or error: %d\n", length);
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return -ECONNABORTED;
}
}
@@ -689,11 +737,11 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
* initialize frame).
*/
cifs_set_port((struct sockaddr *)&server->dstaddr, CIFS_PORT);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
break;
default:
cifs_server_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", type);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
}
return false;
@@ -771,9 +819,9 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
cancel_delayed_work_sync(&server->echo);
cancel_delayed_work_sync(&server->resolve);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->tcpStatus = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up_all(&server->response_q);
/* check if we have blocked requests that need to free */
@@ -866,7 +914,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) -
server->vals->header_preamble_size) {
cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
@@ -913,7 +961,7 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
@@ -1017,7 +1065,7 @@ next_pdu:
server->vals->header_preamble_size) {
cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n",
server->pdu_size);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
continue;
}
@@ -1069,7 +1117,7 @@ next_pdu:
server->ops->is_status_io_timeout(buf)) {
num_io_timeout++;
if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
num_io_timeout = 0;
continue;
}
@@ -1139,7 +1187,7 @@ next_pdu:
}
memalloc_noreclaim_restore(noreclaim_flag);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
}
/*
@@ -1390,16 +1438,12 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
else
cancel_delayed_work_sync(&server->reconnect);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->tcpStatus = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_crypto_secmech_release(server);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(server))
- cifs_fscache_release_client_cookie(server);
-
kfree(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
@@ -1545,7 +1589,9 @@ smbd_connected:
* to the struct since the kernel thread not created yet
* no need to spinlock this update of tcpStatus
*/
+ spin_lock(&cifs_tcp_ses_lock);
tcp_ses->tcpStatus = CifsNeedNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
@@ -1559,14 +1605,6 @@ smbd_connected:
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(tcp_ses))
- cifs_fscache_get_client_cookie(tcp_ses);
-#ifdef CONFIG_CIFS_FSCACHE
- else
- tcp_ses->fscache = tcp_ses->primary_server->fscache;
-#endif /* CONFIG_CIFS_FSCACHE */
-
/* queue echo request delayed work */
queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -1762,15 +1800,13 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_unlock(&cifs_tcp_ses_lock);
return;
}
- spin_unlock(&cifs_tcp_ses_lock);
/* ses_count can never go negative */
WARN_ON(ses->ses_count < 0);
- spin_lock(&GlobalMid_Lock);
if (ses->status == CifsGood)
ses->status = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_free_ipc(ses);
@@ -1789,7 +1825,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_lock(&ses->chan_lock);
chan_count = ses->chan_count;
- spin_unlock(&ses->chan_lock);
/* close any extra channels */
if (chan_count > 1) {
@@ -1806,6 +1841,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
ses->chans[i].server = NULL;
}
}
+ spin_unlock(&ses->chan_lock);
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
@@ -1987,11 +2023,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
ses->status);
- mutex_lock(&ses->session_mutex);
- if (ses->need_reconnect) {
+ spin_lock(&ses->chan_lock);
+ if (cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI, "Session needs reconnect\n");
- rc = cifs_negotiate_protocol(xid, ses);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(xid, ses, server);
if (rc) {
mutex_unlock(&ses->session_mutex);
/* problem -- put our ses reference */
@@ -2000,7 +2038,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
return ERR_PTR(rc);
}
- rc = cifs_setup_session(xid, ses,
+ rc = cifs_setup_session(xid, ses, server,
ctx->local_nls);
if (rc) {
mutex_unlock(&ses->session_mutex);
@@ -2009,8 +2047,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
free_xid(xid);
return ERR_PTR(rc);
}
+ mutex_unlock(&ses->session_mutex);
+
+ spin_lock(&ses->chan_lock);
}
- mutex_unlock(&ses->session_mutex);
+ spin_unlock(&ses->chan_lock);
/* existing SMB ses has a server reference already */
cifs_put_tcp_session(server, 0);
@@ -2060,28 +2101,35 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
ses->sectype = ctx->sectype;
ses->sign = ctx->sign;
- mutex_lock(&ses->session_mutex);
/* add server as first channel */
spin_lock(&ses->chan_lock);
ses->chans[0].server = server;
ses->chan_count = 1;
ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
+ ses->chans_need_reconnect = 1;
spin_unlock(&ses->chan_lock);
- rc = cifs_negotiate_protocol(xid, ses);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(xid, ses, server);
if (!rc)
- rc = cifs_setup_session(xid, ses, ctx->local_nls);
+ rc = cifs_setup_session(xid, ses, server, ctx->local_nls);
+ mutex_unlock(&ses->session_mutex);
/* each channel uses a different signing key */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
sizeof(ses->smb3signingkey));
+ spin_unlock(&ses->chan_lock);
- mutex_unlock(&ses->session_mutex);
if (rc)
goto get_ses_fail;
- /* success, put it on the list and add it as first channel */
+ /*
+ * success, put it on the list and add it as first channel
+ * note: the session becomes active soon after this. So you'll
+ * need to lock before changing something in the session.
+ */
spin_lock(&cifs_tcp_ses_lock);
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2161,6 +2209,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
/* tc_count can never go negative */
WARN_ON(tcon->tc_count < 0);
+ list_del_init(&tcon->tcon_list);
+ spin_unlock(&cifs_tcp_ses_lock);
+
if (tcon->use_witness) {
int rc;
@@ -2171,9 +2222,6 @@ cifs_put_tcon(struct cifs_tcon *tcon)
}
}
- list_del_init(&tcon->tcon_list);
- spin_unlock(&cifs_tcp_ses_lock);
-
xid = get_xid();
if (ses->server->ops->tree_disconnect)
ses->server->ops->tree_disconnect(xid, tcon);
@@ -2290,10 +2338,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
}
- /*
- * BB Do we need to wrap session_mutex around this TCon call and Unix
- * SetFS as we do on SessSetup and reconnect?
- */
xid = get_xid();
rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon,
ctx->local_nls);
@@ -3029,12 +3073,15 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
* for just this mount.
*/
reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx);
+ spin_lock(&cifs_tcp_ses_lock);
if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
(le64_to_cpu(tcon->fsUnixInfo.Capability) &
CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EACCES;
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
} else
tcon->unix_ext = 0; /* server does not support them */
@@ -3069,7 +3116,8 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
* Inside cifs_fscache_get_super_cookie it checks
* that we do not get super cookie twice.
*/
- cifs_fscache_get_super_cookie(tcon);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ cifs_fscache_get_super_cookie(tcon);
out:
mnt_ctx->server = server;
@@ -3322,6 +3370,11 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
full_path);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb,
+ full_path);
+#endif
if (rc != 0 && rc != -EREMOTE) {
kfree(full_path);
return rc;
@@ -3709,8 +3762,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
if (rc == 0) {
bool is_unicode;
- tcon->tidStatus = CifsGood;
- tcon->need_reconnect = false;
tcon->tid = smb_buffer_response->Tid;
bcc_ptr = pByteArea(smb_buffer_response);
bytes_left = get_bcc(smb_buffer_response);
@@ -3799,26 +3850,37 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
}
int
-cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses)
+cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
if (!server->ops->need_neg || !server->ops->negotiate)
return -ENOSYS;
/* only send once per connect */
- if (!server->ops->need_neg(server))
+ spin_lock(&cifs_tcp_ses_lock);
+ if (!server->ops->need_neg(server) ||
+ server->tcpStatus != CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return 0;
+ }
+ server->tcpStatus = CifsInNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
- rc = server->ops->negotiate(xid, ses);
+ rc = server->ops->negotiate(xid, ses, server);
if (rc == 0) {
- spin_lock(&GlobalMid_Lock);
- if (server->tcpStatus == CifsNeedNegotiate)
- server->tcpStatus = CifsGood;
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInNegotiate)
+ server->tcpStatus = CifsNeedSessSetup;
else
rc = -EHOSTDOWN;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInNegotiate)
+ server->tcpStatus = CifsNeedNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
}
return rc;
@@ -3826,12 +3888,26 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses)
int
cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct nls_table *nls_info)
{
int rc = -ENOSYS;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
+ bool is_binding = false;
- if (!ses->binding) {
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedSessSetup) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ server->tcpStatus = CifsInSessSetup;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
+ if (!is_binding) {
ses->capabilities = server->capabilities;
if (!linuxExtEnabled)
ses->capabilities &= (~server->vals->cap_unix);
@@ -3849,10 +3925,26 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
server->sec_mode, server->capabilities, server->timeAdj);
if (server->ops->sess_setup)
- rc = server->ops->sess_setup(xid, ses, nls_info);
+ rc = server->ops->sess_setup(xid, ses, server, nls_info);
- if (rc)
+ if (rc) {
cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsNeedSessSetup;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsGood;
+ /* Even if one channel is active, session is in good state */
+ ses->status = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
+ }
return rc;
}
@@ -4296,7 +4388,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco
*/
if (rc && server->current_fullpath != server->origin_fullpath) {
server->current_fullpath = server->origin_fullpath;
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
dfs_cache_free_tgts(tl);
@@ -4314,9 +4406,22 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
char *tree;
struct dfs_info3_param ref = {0};
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ (tcon->tidStatus != CifsNew &&
+ tcon->tidStatus != CifsNeedTcon)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ tcon->tidStatus = CifsInTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
- if (!tree)
- return -ENOMEM;
+ if (!tree) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (tcon->ipc) {
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
@@ -4348,13 +4453,52 @@ out:
kfree(tree);
cifs_put_tcp_super(sb);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
return rc;
}
#else
int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
{
+ int rc;
const struct smb_version_operations *ops = tcon->ses->server->ops;
- return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ (tcon->tidStatus != CifsNew &&
+ tcon->tidStatus != CifsNeedTcon)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ tcon->tidStatus = CifsInTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
+ return rc;
}
#endif
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index e9b0fa2a9614..dd9643751671 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6e8e7cc26ae2..ce9b22aecfba 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -22,6 +22,7 @@
#include "cifs_unicode.h"
#include "fs_context.h"
#include "cifs_ioctl.h"
+#include "fscache.h"
static void
renew_parental_timestamps(struct dentry *direntry)
@@ -507,8 +508,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
rc = -ENOMEM;
+ goto out;
}
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+
out:
cifs_put_tlink(tlink);
out_free_xid:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9fee3af83a73..59334be9ed3b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -376,8 +376,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
struct cifsLockInfo *li, *tmp;
struct super_block *sb = inode->i_sb;
- cifs_fscache_release_inode_cookie(inode);
-
/*
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
@@ -570,7 +568,7 @@ int cifs_open(struct inode *inode, struct file *file)
spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
- goto out;
+ goto use_cache;
} else {
_cifsFileInfo_put(cfile, true, false);
}
@@ -632,8 +630,6 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
- cifs_fscache_set_inode_cookie(inode, file);
-
if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
/*
* Time to set mode which we can not set earlier due to
@@ -652,6 +648,15 @@ int cifs_open(struct inode *inode, struct file *file)
cfile->pid);
}
+use_cache:
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+ if (file->f_flags & O_DIRECT &&
+ (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
+ file->f_flags & O_APPEND))
+ cifs_invalidate_cache(file_inode(file),
+ FSCACHE_INVAL_DIO_WRITE);
+
out:
free_dentry_path(page);
free_xid(xid);
@@ -876,6 +881,8 @@ int cifs_close(struct inode *inode, struct file *file)
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_deferred_close *dclose;
+ cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
+
if (file->private_data != NULL) {
cfile = file->private_data;
file->private_data = NULL;
@@ -886,7 +893,6 @@ int cifs_close(struct inode *inode, struct file *file)
dclose) {
if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode->i_ctime = inode->i_mtime = current_time(inode);
- cifs_fscache_update_inode_cookie(inode);
}
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
@@ -4198,10 +4204,12 @@ static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct file *file = vmf->vma->vm_file;
- struct inode *inode = file_inode(file);
- cifs_fscache_wait_on_page_write(inode, page);
+#ifdef CONFIG_CIFS_FSCACHE
+ if (PageFsCache(page) &&
+ wait_on_page_fscache_killable(page) < 0)
+ return VM_FAULT_RETRY;
+#endif
lock_page(page);
return VM_FAULT_LOCKED;
@@ -4275,8 +4283,6 @@ cifs_readv_complete(struct work_struct *work)
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
- else
- cifs_fscache_uncache_page(rdata->mapping->host, page);
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
@@ -4593,11 +4599,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
kref_put(&rdata->refcount, cifs_readdata_release);
}
- /* Any pages that have been shown to fscache but didn't get added to
- * the pagecache must be uncached before they get returned to the
- * allocator.
- */
- cifs_fscache_readpages_cancel(mapping->host, page_list);
free_xid(xid);
return rc;
}
@@ -4801,17 +4802,19 @@ static int cifs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
- return cifs_fscache_release_page(page, gfp);
+ if (PageFsCache(page)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
+ }
+ fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
+ return true;
}
static void cifs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
- struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
-
- if (offset == 0 && length == PAGE_SIZE)
- cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+ wait_on_page_fscache(page);
}
static int cifs_launder_page(struct page *page)
@@ -4831,7 +4834,7 @@ static int cifs_launder_page(struct page *page)
if (clear_page_dirty_for_io(page))
rc = cifs_writepage_locked(page, &wbc);
- cifs_fscache_invalidate_page(page, page->mapping->host);
+ wait_on_page_fscache(page);
return rc;
}
@@ -4988,6 +4991,19 @@ static void cifs_swap_deactivate(struct file *file)
/* do we need to unpin (or unlock) the file */
}
+/*
+ * Mark a page as having been made dirty and thus needing writeback. We also
+ * need to pin the cache object to write back to.
+ */
+#ifdef CONFIG_CIFS_FSCACHE
+static int cifs_set_page_dirty(struct page *page)
+{
+ return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
+}
+#else
+#define cifs_set_page_dirty __set_page_dirty_nobuffers
+#endif
+
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -4995,7 +5011,7 @@ const struct address_space_operations cifs_addr_ops = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.direct_IO = cifs_direct_io,
.invalidatepage = cifs_invalidate_page,
@@ -5020,7 +5036,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.invalidatepage = cifs_invalidate_page,
.launder_page = cifs_launder_page,
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index e3ed25dc6f3f..7ec35f3f0a5f 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -37,6 +37,8 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
+static DEFINE_MUTEX(cifs_mount_mutex);
+
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
{ Smb_20, SMB20_VERSION_STRING},
@@ -707,10 +709,14 @@ static int smb3_get_tree_common(struct fs_context *fc)
static int smb3_get_tree(struct fs_context *fc)
{
int err = smb3_fs_context_validate(fc);
+ int ret;
if (err)
return err;
- return smb3_get_tree_common(fc);
+ mutex_lock(&cifs_mount_mutex);
+ ret = smb3_get_tree_common(fc);
+ mutex_unlock(&cifs_mount_mutex);
+ return ret;
}
static void smb3_fs_context_free(struct fs_context *fc)
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 003c5f1f4dfb..efaac4d5ff55 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -12,250 +12,136 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"
-/*
- * Key layout of CIFS server cache index object
- */
-struct cifs_server_key {
- __u64 conn_id;
-} __packed;
-
-/*
- * Get a cookie for a server object keyed by {IPaddress,port,family} tuple
- */
-void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
-{
- struct cifs_server_key key;
-
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (server->fscache)
- return;
-
- memset(&key, 0, sizeof(key));
- key.conn_id = server->conn_id;
-
- server->fscache =
- fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def,
- &key, sizeof(key),
- NULL, 0,
- server, 0, true);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
-}
-
-void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+static void cifs_fscache_fill_volume_coherency(
+ struct cifs_tcon *tcon,
+ struct cifs_fscache_volume_coherency_data *cd)
{
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
- fscache_relinquish_cookie(server->fscache, NULL, false);
- server->fscache = NULL;
+ memset(cd, 0, sizeof(*cd));
+ cd->resource_id = cpu_to_le64(tcon->resource_id);
+ cd->vol_create_time = tcon->vol_create_time;
+ cd->vol_serial_number = cpu_to_le32(tcon->vol_serial_number);
}
-void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
+int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
{
+ struct cifs_fscache_volume_coherency_data cd;
struct TCP_Server_Info *server = tcon->ses->server;
+ struct fscache_volume *vcookie;
+ const struct sockaddr *sa = (struct sockaddr *)&server->dstaddr;
+ size_t slen, i;
char *sharename;
- struct cifs_fscache_super_auxdata auxdata;
+ char *key;
+ int ret = -ENOMEM;
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (tcon->fscache)
- return;
+ tcon->fscache = NULL;
+ switch (sa->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
+ return -EINVAL;
+ }
+
+ memset(&key, 0, sizeof(key));
sharename = extract_sharename(tcon->treeName);
if (IS_ERR(sharename)) {
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
- tcon->fscache = NULL;
- return;
+ return -EINVAL;
}
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ slen = strlen(sharename);
+ for (i = 0; i < slen; i++)
+ if (sharename[i] == '/')
+ sharename[i] = ';';
+
+ key = kasprintf(GFP_KERNEL, "cifs,%pISpc,%s", sa, sharename);
+ if (!key)
+ goto out;
+
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ vcookie = fscache_acquire_volume(key,
+ NULL, /* preferred_cache */
+ &cd, sizeof(cd));
+ cifs_dbg(FYI, "%s: (%s/0x%p)\n", __func__, key, vcookie);
+ if (IS_ERR(vcookie)) {
+ if (vcookie != ERR_PTR(-EBUSY)) {
+ ret = PTR_ERR(vcookie);
+ goto out_2;
+ }
+ pr_err("Cache volume key already in use (%s)\n", key);
+ vcookie = NULL;
+ }
- tcon->fscache =
- fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def,
- sharename, strlen(sharename),
- &auxdata, sizeof(auxdata),
- tcon, 0, true);
+ tcon->fscache = vcookie;
+ ret = 0;
+out_2:
+ kfree(key);
+out:
kfree(sharename);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server->fscache, tcon->fscache);
+ return ret;
}
void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
{
- struct cifs_fscache_super_auxdata auxdata;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ struct cifs_fscache_volume_coherency_data cd;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
- fscache_relinquish_cookie(tcon->fscache, &auxdata, false);
- tcon->fscache = NULL;
-}
-
-static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
- struct cifs_tcon *tcon)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- cifsi->fscache =
- fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
- &auxdata, sizeof(auxdata),
- cifsi, cifsi->vfs_inode.i_size, true);
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ fscache_relinquish_volume(tcon->fscache, &cd, false);
+ tcon->fscache = NULL;
}
-static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+void cifs_fscache_get_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- if (cifsi->fscache)
- return;
-
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE))
- return;
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
- cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
- __func__, tcon->fscache, cifsi->fscache);
+ cifsi->fscache =
+ fscache_acquire_cookie(tcon->fscache, 0,
+ &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &cd, sizeof(cd),
+ i_size_read(&cifsi->vfs_inode));
}
-void cifs_fscache_release_inode_cookie(struct inode *inode)
+void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update)
{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
+ if (update) {
+ struct cifs_fscache_inode_coherency_data cd;
+ loff_t i_size = i_size_read(inode);
- cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- /* fscache_relinquish_cookie does not seem to update auxdata */
- fscache_update_cookie(cifsi->fscache, &auxdata);
- fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
- cifsi->fscache = NULL;
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_unuse_cookie(cifs_inode_cookie(inode), &cd, &i_size);
+ } else {
+ fscache_unuse_cookie(cifs_inode_cookie(inode), NULL, NULL);
}
}
-void cifs_fscache_update_inode_cookie(struct inode *inode)
+void cifs_fscache_release_inode_cookie(struct inode *inode)
{
- struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_update_cookie(cifsi->fscache, &auxdata);
- }
-}
-
-void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
-{
- cifs_fscache_enable_inode_cookie(inode);
-}
-
-void cifs_fscache_reset_inode_cookie(struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- struct fscache_cookie *old = cifsi->fscache;
-
- if (cifsi->fscache) {
- /* retire the current fscache cache and get a new one */
- fscache_relinquish_cookie(cifsi->fscache, NULL, true);
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
- cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
- __func__, cifsi->fscache, old);
+ fscache_relinquish_cookie(cifsi->fscache, false);
+ cifsi->fscache = NULL;
}
}
-int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- if (PageFsCache(page)) {
- struct inode *inode = page->mapping->host;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, page, cifsi->fscache);
- if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
- return 0;
- }
-
- return 1;
-}
-
-static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
- int error)
-{
- cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error);
- if (!error)
- SetPageUptodate(page);
- unlock_page(page);
-}
-
/*
* Retrieve a page from FS-Cache
*/
int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
- int ret;
-
cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
__func__, CIFS_I(inode)->fscache, page, inode);
- ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
- cifs_readpage_from_fscache_complete,
- NULL,
- GFP_KERNEL);
- switch (ret) {
-
- case 0: /* page found in fscache, read submitted */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
- case -ENOBUFS: /* page won't be cached */
- case -ENODATA: /* page not in cache */
- cifs_dbg(FYI, "%s: %d\n", __func__, ret);
- return 1;
-
- default:
- cifs_dbg(VFS, "unknown error ret = %d\n", ret);
- }
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
/*
@@ -266,78 +152,19 @@ int __cifs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages)
{
- int ret;
-
cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
__func__, CIFS_I(inode)->fscache, *nr_pages, inode);
- ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
- pages, nr_pages,
- cifs_readpage_from_fscache_complete,
- NULL,
- mapping_gfp_mask(mapping));
- switch (ret) {
- case 0: /* read submitted to the cache for all pages */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
-
- case -ENOBUFS: /* some pages are not cached and can't be */
- case -ENODATA: /* some pages are not cached */
- cifs_dbg(FYI, "%s: no page\n", __func__);
- return 1;
-
- default:
- cifs_dbg(FYI, "unknown error ret = %d\n", ret);
- }
-
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
- int ret;
WARN_ON(!cifsi->fscache);
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
__func__, cifsi->fscache, page, inode);
- ret = fscache_write_page(cifsi->fscache, page,
- cifsi->vfs_inode.i_size, GFP_KERNEL);
- if (ret != 0)
- fscache_uncache_page(cifsi->fscache, page);
-}
-
-void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
-{
- cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
- __func__, CIFS_I(inode)->fscache, inode);
- fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
-}
-
-void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
- fscache_uncache_page(cookie, page);
-}
-
-void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
-}
-
-void __cifs_fscache_uncache_page(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_uncache_page(cookie, page);
+ // Needs conversion to using netfslib
}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 9baa1d0f22bd..c6ca49ac33d4 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -13,84 +13,71 @@
#include "cifsglob.h"
-#ifdef CONFIG_CIFS_FSCACHE
-
/*
- * Auxiliary data attached to CIFS superblock within the cache
+ * Coherency data attached to CIFS volume within the cache
*/
-struct cifs_fscache_super_auxdata {
- u64 resource_id; /* unique server resource id */
+struct cifs_fscache_volume_coherency_data {
+ __le64 resource_id; /* unique server resource id */
__le64 vol_create_time;
- u32 vol_serial_number;
+ __le32 vol_serial_number;
} __packed;
/*
- * Auxiliary data attached to CIFS inode within the cache
+ * Coherency data attached to CIFS inode within the cache.
*/
-struct cifs_fscache_inode_auxdata {
- u64 last_write_time_sec;
- u64 last_change_time_sec;
- u32 last_write_time_nsec;
- u32 last_change_time_nsec;
- u64 eof;
+struct cifs_fscache_inode_coherency_data {
+ __le64 last_write_time_sec;
+ __le64 last_change_time_sec;
+ __le32 last_write_time_nsec;
+ __le32 last_change_time_nsec;
};
-/*
- * cache.c
- */
-extern struct fscache_netfs cifs_fscache_netfs;
-extern const struct fscache_cookie_def cifs_fscache_server_index_def;
-extern const struct fscache_cookie_def cifs_fscache_super_index_def;
-extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
-
-extern int cifs_fscache_register(void);
-extern void cifs_fscache_unregister(void);
+#ifdef CONFIG_CIFS_FSCACHE
/*
* fscache.c
*/
-extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
+extern int cifs_fscache_get_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
+extern void cifs_fscache_get_inode_cookie(struct inode *inode);
extern void cifs_fscache_release_inode_cookie(struct inode *);
-extern void cifs_fscache_update_inode_cookie(struct inode *inode);
-extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
-extern void cifs_fscache_reset_inode_cookie(struct inode *);
+extern void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update);
+
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ memset(cd, 0, sizeof(*cd));
+ cd->last_write_time_sec = cpu_to_le64(cifsi->vfs_inode.i_mtime.tv_sec);
+ cd->last_write_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_mtime.tv_nsec);
+ cd->last_change_time_sec = cpu_to_le64(cifsi->vfs_inode.i_ctime.tv_sec);
+ cd->last_change_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_ctime.tv_nsec);
+}
+
-extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
-extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page);
-extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page);
extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
extern int __cifs_readpages_from_fscache(struct inode *,
struct address_space *,
struct list_head *,
unsigned *);
-extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
-
extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode)
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
- if (PageFsCache(page))
- __cifs_fscache_invalidate_page(page, inode);
+ return CIFS_I(inode)->fscache;
}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page)
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)
{
- if (PageFsCache(page))
- __cifs_fscache_wait_on_page_write(inode, page);
-}
+ struct cifs_fscache_inode_coherency_data cd;
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page)
-{
- if (PageFsCache(page))
- __cifs_fscache_uncache_page(inode, page);
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd,
+ i_size_read(inode), flags);
}
static inline int cifs_readpage_from_fscache(struct inode *inode,
@@ -120,41 +107,21 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
__cifs_readpage_to_fscache(inode, page);
}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
+#else /* CONFIG_CIFS_FSCACHE */
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
{
- if (CIFS_I(inode)->fscache)
- return __cifs_fscache_readpages_cancel(inode, pages);
}
-#else /* CONFIG_CIFS_FSCACHE */
-static inline int cifs_fscache_register(void) { return 0; }
-static inline void cifs_fscache_unregister(void) {}
-
-static inline void
-cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
-static inline void
-cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
-static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {}
-static inline void
-cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { return 0; }
+static inline void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline void cifs_fscache_get_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
- struct file *filp) {}
-static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
-static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- return 1; /* May release page */
-}
-
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode) {}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page) {}
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page) {}
+static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
static inline int
cifs_readpage_from_fscache(struct inode *inode, struct page *page)
@@ -173,11 +140,6 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
static inline void cifs_readpage_to_fscache(struct inode *inode,
struct page *page) {}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
-{
-}
-
#endif /* CONFIG_CIFS_FSCACHE */
#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279622e4eb1c..7d8b3ceb2af3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -952,6 +952,12 @@ cifs_get_inode_info(struct inode **inode,
rc = server->ops->query_path_info(xid, tcon, cifs_sb,
full_path, tmp_data,
&adjust_tz, &is_reparse_point);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon,
+ cifs_sb,
+ full_path);
+#endif
data = tmp_data;
}
@@ -1298,10 +1304,7 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
-#ifdef CONFIG_CIFS_FSCACHE
- /* initialize per-inode cache cookie pointer */
- CIFS_I(inode)->fscache = NULL;
-#endif
+ cifs_fscache_get_inode_cookie(inode);
unlock_new_inode(inode);
}
}
@@ -1370,6 +1373,7 @@ iget_no_retry:
iget_failed(inode);
inode = ERR_PTR(rc);
}
+
out:
kfree(path);
free_xid(xid);
@@ -2257,6 +2261,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
int
cifs_invalidate_mapping(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
int rc = 0;
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
@@ -2266,7 +2272,8 @@ cifs_invalidate_mapping(struct inode *inode)
__func__, inode);
}
- cifs_fscache_reset_inode_cookie(inode);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
return rc;
}
@@ -2771,8 +2778,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
goto out;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
@@ -2967,8 +2976,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
goto cifs_setattr_exit;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5148d48d6a35..56598f7dbe00 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1302,4 +1302,53 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
return 0;
}
+
+/** cifs_dfs_query_info_nonascii_quirk
+ * Handle weird Windows SMB server behaviour. It responds with
+ * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request
+ * for "\<server>\<dfsname>\<linkpath>" DFS reference,
+ * where <dfsname> contains non-ASCII unicode symbols.
+ *
+ * Check such DFS reference and emulate -ENOENT if it is actual.
+ */
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *linkpath)
+{
+ char *treename, *dfspath, sep;
+ int treenamelen, linkpathlen, rc;
+
+ treename = tcon->treeName;
+ /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL
+ * messages MUST be encoded with exactly one leading backslash, not two
+ * leading backslashes.
+ */
+ sep = CIFS_DIR_SEP(cifs_sb);
+ if (treename[0] == sep && treename[1] == sep)
+ treename++;
+ linkpathlen = strlen(linkpath);
+ treenamelen = strnlen(treename, MAX_TREE_SIZE + 1);
+ dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL);
+ if (!dfspath)
+ return -ENOMEM;
+ if (treenamelen)
+ memcpy(dfspath, treename, treenamelen);
+ memcpy(dfspath + treenamelen, linkpath, linkpathlen);
+ rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), dfspath, NULL, NULL);
+ if (rc == 0) {
+ cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n",
+ dfspath);
+ rc = -EREMOTE;
+ } else if (rc == -EEXIST) {
+ cifs_dbg(FYI, "DFS ref '%s' is not found, emulate -ENOENT\n",
+ dfspath);
+ rc = -ENOENT;
+ } else {
+ cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc);
+ }
+ kfree(dfspath);
+ return rc;
+}
#endif
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index fa9fbd6a819c..ebe236b9d9f5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -896,10 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr)
if (class == ERRSRV && code == ERRbaduid) {
cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n",
code);
- spin_lock(&GlobalMid_Lock);
- if (mid->server->tcpStatus != CifsExiting)
- mid->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ cifs_reconnect(mid->server, false);
}
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index fe707f45da89..298458404252 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -40,7 +40,7 @@
#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
/* #define reserved4 0x1000000 */
-#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
+#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we only set for SMB2+ */
/* #define reserved3 0x4000000 */
/* #define reserved2 0x8000000 */
/* #define reserved1 0x10000000 */
@@ -87,6 +87,30 @@ typedef struct _NEGOTIATE_MESSAGE {
/* followed by WorkstationString */
} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
+#define NTLMSSP_REVISION_W2K3 0x0F
+
+/* See MS-NLMP section 2.2.2.10 */
+struct ntlmssp_version {
+ __u8 ProductMajorVersion;
+ __u8 ProductMinorVersion;
+ __le16 ProductBuild; /* we send the cifs.ko module version here */
+ __u8 Reserved[3];
+ __u8 NTLMRevisionCurrent; /* currently 0x0F */
+} __packed;
+
+/* see MS-NLMP section 2.2.1.1 */
+struct negotiate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmNegotiate = 1 */
+ __le32 NegotiateFlags;
+ SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */
+ SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */
+ struct ntlmssp_version Version;
+ /* SECURITY_BUFFER */
+ char DomainString[0];
+ /* followed by WorkstationString */
+} __packed;
+
typedef struct _CHALLENGE_MESSAGE {
__u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
__le32 MessageType; /* NtLmChallenge = 2 */
@@ -121,7 +145,13 @@ typedef struct _AUTHENTICATE_MESSAGE {
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp);
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 035dc3e245dc..dc3b16d1be09 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -17,6 +17,8 @@
#include "nterr.h"
#include <linux/utsname.h>
#include <linux/slab.h>
+#include <linux/version.h>
+#include "cifsfs.h"
#include "cifs_spnego.h"
#include "smb2proto.h"
#include "fs_context.h"
@@ -65,6 +67,55 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
return false;
}
+/* channel helper functions. assumed that chan_lock is held by caller. */
+
+unsigned int
+cifs_ses_get_chan_index(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int i;
+
+ for (i = 0; i < ses->chan_count; i++) {
+ if (ses->chans[i].server == server)
+ return i;
+ }
+
+ /* If we didn't find the channel, it is likely a bug */
+ WARN_ON(1);
+ return 0;
+}
+
+void
+cifs_chan_set_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ set_bit(chan_index, &ses->chans_need_reconnect);
+ cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n",
+ chan_index, ses->chans_need_reconnect);
+}
+
+void
+cifs_chan_clear_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ clear_bit(chan_index, &ses->chans_need_reconnect);
+ cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n",
+ chan_index, ses->chans_need_reconnect);
+}
+
+bool
+cifs_chan_needs_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index);
+}
+
/* returns number of channels added */
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
{
@@ -87,10 +138,10 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
left = ses->chan_max - ses->chan_count;
if (left <= 0) {
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI,
"ses already at max_channels (%zu), nothing to open\n",
ses->chan_max);
- spin_unlock(&ses->chan_lock);
return 0;
}
@@ -261,9 +312,8 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
chan_server = cifs_get_tcp_session(&ctx, ses->server);
- mutex_lock(&ses->session_mutex);
spin_lock(&ses->chan_lock);
- chan = ses->binding_chan = &ses->chans[ses->chan_count];
+ chan = &ses->chans[ses->chan_count];
chan->server = chan_server;
if (IS_ERR(chan->server)) {
rc = PTR_ERR(chan->server);
@@ -271,8 +321,15 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
spin_unlock(&ses->chan_lock);
goto out;
}
+ ses->chan_count++;
+ atomic_set(&ses->chan_seq, 0);
+
+ /* Mark this channel as needing connect/setup */
+ cifs_chan_set_need_reconnect(ses, chan->server);
+
spin_unlock(&ses->chan_lock);
+ mutex_lock(&ses->session_mutex);
/*
* We need to allocate the server crypto now as we will need
* to sign packets before we generate the channel signing key
@@ -281,37 +338,29 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
rc = smb311_crypto_shash_allocate(chan->server);
if (rc) {
cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
+ mutex_unlock(&ses->session_mutex);
goto out;
}
- ses->binding = true;
- rc = cifs_negotiate_protocol(xid, ses);
- if (rc)
- goto out;
-
- rc = cifs_setup_session(xid, ses, cifs_sb->local_nls);
- if (rc)
- goto out;
-
- /* success, put it on the list
- * XXX: sharing ses between 2 tcp servers is not possible, the
- * way "internal" linked lists works in linux makes element
- * only able to belong to one list
- *
- * the binding session is already established so the rest of
- * the code should be able to look it up, no need to add the
- * ses to the new server.
- */
+ rc = cifs_negotiate_protocol(xid, ses, chan->server);
+ if (!rc)
+ rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls);
- spin_lock(&ses->chan_lock);
- ses->chan_count++;
- atomic_set(&ses->chan_seq, 0);
- spin_unlock(&ses->chan_lock);
+ mutex_unlock(&ses->session_mutex);
out:
- ses->binding = false;
- ses->binding_chan = NULL;
- mutex_unlock(&ses->session_mutex);
+ if (rc && chan->server) {
+ spin_lock(&ses->chan_lock);
+ /* we rely on all bits beyond chan_count to be clear */
+ cifs_chan_clear_need_reconnect(ses, chan->server);
+ ses->chan_count--;
+ /*
+ * chan_count should never reach 0 as at least the primary
+ * channel is always allocated
+ */
+ WARN_ON(ses->chan_count < 1);
+ spin_unlock(&ses->chan_lock);
+ }
if (rc && chan->server)
cifs_put_tcp_session(chan->server, 0);
@@ -319,20 +368,9 @@ out:
return rc;
}
-/* Mark all session channels for reconnect */
-void cifs_ses_mark_for_reconnect(struct cifs_ses *ses)
-{
- int i;
-
- for (i = 0; i < ses->chan_count; i++) {
- spin_lock(&GlobalMid_Lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
- }
-}
-
-static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
+static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ SESSION_SETUP_ANDX *pSMB)
{
__u32 capabilities = 0;
@@ -345,7 +383,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32,
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
- pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
+ pSMB->req.MaxMpxCount = cpu_to_le16(server->maxReq);
pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@ -356,7 +394,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
- if (ses->server->sign)
+ if (server->sign)
pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
if (ses->capabilities & CAP_UNICODE) {
@@ -719,10 +757,10 @@ static inline void cifs_security_buffer_from_str(SECURITY_BUFFER *pbuf,
int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
NEGOTIATE_MESSAGE *sec_blob;
__u32 flags;
unsigned char *tmp;
@@ -773,9 +811,78 @@ setup_ntlm_neg_ret:
return rc;
}
+/*
+ * Build ntlmssp blob with additional fields, such as version,
+ * supported by modern servers. For safety limit to SMB3 or later
+ * See notes in MS-NLMP Section 2.2.2.1 e.g.
+ */
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer,
+ u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
+ struct negotiate_message *sec_blob;
+ __u32 flags;
+ unsigned char *tmp;
+ int len;
+
+ len = size_of_ntlmssp_blob(ses, sizeof(struct negotiate_message));
+ *pbuffer = kmalloc(len, GFP_KERNEL);
+ if (!*pbuffer) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+ *buflen = 0;
+ goto setup_ntlm_smb3_neg_ret;
+ }
+ sec_blob = (struct negotiate_message *)*pbuffer;
+
+ memset(*pbuffer, 0, sizeof(struct negotiate_message));
+ memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+ sec_blob->MessageType = NtLmNegotiate;
+
+ /* BB is NTLMV2 session security format easier to use here? */
+ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
+ NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
+ NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL |
+ NTLMSSP_NEGOTIATE_SIGN | NTLMSSP_NEGOTIATE_VERSION;
+ if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
+ sec_blob->Version.ProductMajorVersion = LINUX_VERSION_MAJOR;
+ sec_blob->Version.ProductMinorVersion = LINUX_VERSION_PATCHLEVEL;
+ sec_blob->Version.ProductBuild = cpu_to_le16(SMB3_PRODUCT_BUILD);
+ sec_blob->Version.NTLMRevisionCurrent = NTLMSSP_REVISION_W2K3;
+
+ tmp = *pbuffer + sizeof(struct negotiate_message);
+ ses->ntlmssp->client_flags = flags;
+ sec_blob->NegotiateFlags = cpu_to_le32(flags);
+
+ /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
+ cifs_security_buffer_from_str(&sec_blob->DomainName,
+ NULL,
+ CIFS_MAX_DOMAINNAME_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+ NULL,
+ CIFS_MAX_WORKSTATION_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ *buflen = tmp - *pbuffer;
+setup_ntlm_smb3_neg_ret:
+ return rc;
+}
+
+
int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc;
@@ -912,6 +1019,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
struct sess_data {
unsigned int xid;
struct cifs_ses *ses;
+ struct TCP_Server_Info *server;
struct nls_table *nls_cp;
void (*func)(struct sess_data *);
int result;
@@ -978,31 +1086,27 @@ static int
sess_establish_session(struct sess_data *sess_data)
{
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- mutex_lock(&ses->server->srv_mutex);
- if (!ses->server->session_estab) {
- if (ses->server->sign) {
- ses->server->session_key.response =
+ mutex_lock(&server->srv_mutex);
+ if (!server->session_estab) {
+ if (server->sign) {
+ server->session_key.response =
kmemdup(ses->auth_key.response,
ses->auth_key.len, GFP_KERNEL);
- if (!ses->server->session_key.response) {
- mutex_unlock(&ses->server->srv_mutex);
+ if (!server->session_key.response) {
+ mutex_unlock(&server->srv_mutex);
return -ENOMEM;
}
- ses->server->session_key.len =
+ server->session_key.len =
ses->auth_key.len;
}
- ses->server->sequence_number = 0x2;
- ses->server->session_estab = true;
+ server->sequence_number = 0x2;
+ server->session_estab = true;
}
- mutex_unlock(&ses->server->srv_mutex);
+ mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "CIFS session established successfully\n");
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
-
return 0;
}
@@ -1036,6 +1140,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
SESSION_SETUP_ANDX *pSMB;
char *bcc_ptr;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
__u16 bytes_remaining;
@@ -1047,7 +1152,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
@@ -1145,6 +1250,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
SESSION_SETUP_ANDX *pSMB;
char *bcc_ptr;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
__u16 bytes_remaining;
struct key *spnego_key = NULL;
@@ -1159,9 +1265,9 @@ sess_auth_kerberos(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
- spnego_key = cifs_get_spnego_key(ses);
+ spnego_key = cifs_get_spnego_key(ses, server);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
spnego_key = NULL;
@@ -1285,12 +1391,13 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
{
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
char *bcc_ptr;
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
return -ENOSYS;
@@ -1324,6 +1431,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
struct smb_hdr *smb_buf;
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u16 bytes_remaining;
char *bcc_ptr;
unsigned char *ntlmsspblob = NULL;
@@ -1351,10 +1459,10 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
/* Build security blob before we assemble the request */
rc = build_ntlmssp_negotiate_blob(&ntlmsspblob,
- &blob_len, ses,
+ &blob_len, ses, server,
sess_data->nls_cp);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
sess_data->iov[1].iov_len = blob_len;
sess_data->iov[1].iov_base = ntlmsspblob;
@@ -1362,7 +1470,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
rc = sess_sendreceive(sess_data);
@@ -1376,14 +1484,14 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = 0;
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
if (smb_buf->WordCount != 4) {
rc = -EIO;
cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
+ goto out_free_ntlmsspblob;
}
ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
@@ -1397,10 +1505,13 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
cifs_dbg(VFS, "bad security blob length %d\n",
blob_len);
rc = -EINVAL;
- goto out;
+ goto out_free_ntlmsspblob;
}
rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
+
+out_free_ntlmsspblob:
+ kfree(ntlmsspblob);
out:
sess_free_buffer(sess_data);
@@ -1426,6 +1537,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
struct smb_hdr *smb_buf;
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u16 bytes_remaining;
char *bcc_ptr;
unsigned char *ntlmsspblob = NULL;
@@ -1442,7 +1554,8 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
smb_buf = (struct smb_hdr *)pSMB;
rc = build_ntlmssp_auth_blob(&ntlmsspblob,
- &blob_len, ses, sess_data->nls_cp);
+ &blob_len, ses, server,
+ sess_data->nls_cp);
if (rc)
goto out_free_ntlmsspblob;
sess_data->iov[1].iov_len = blob_len;
@@ -1513,7 +1626,7 @@ out_free_ntlmsspblob:
out:
sess_free_buffer(sess_data);
- if (!rc)
+ if (!rc)
rc = sess_establish_session(sess_data);
/* Cleanup */
@@ -1526,11 +1639,13 @@ out:
sess_data->result = rc;
}
-static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
+static int select_sec(struct sess_data *sess_data)
{
int type;
+ struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- type = cifs_select_sectype(ses->server, ses->sectype);
+ type = cifs_select_sectype(server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
if (type == Unspecified) {
cifs_dbg(VFS, "Unable to select appropriate authentication method!\n");
@@ -1561,7 +1676,8 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
}
int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *nls_cp)
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp)
{
int rc = 0;
struct sess_data *sess_data;
@@ -1575,15 +1691,16 @@ int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
if (!sess_data)
return -ENOMEM;
- rc = select_sec(ses, sess_data);
- if (rc)
- goto out;
-
sess_data->xid = xid;
sess_data->ses = ses;
+ sess_data->server = server;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
+ rc = select_sec(sess_data);
+ if (rc)
+ goto out;
+
while (sess_data->func)
sess_data->func(sess_data);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3b83839fc2c2..8272c91e15ef 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -7,6 +7,7 @@
#include <linux/pagemap.h>
#include <linux/vfs.h>
+#include <uapi/linux/magic.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
@@ -163,7 +164,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
{
__u64 mid = 0;
__u16 last_mid, cur_mid;
- bool collision;
+ bool collision, reconnect = false;
spin_lock(&GlobalMid_Lock);
@@ -215,7 +216,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
* an eventual reconnect to clean out the pending_mid_q.
*/
if (num_mids > 32768)
- server->tcpStatus = CifsNeedReconnect;
+ reconnect = true;
if (!collision) {
mid = (__u64)cur_mid;
@@ -225,6 +226,13 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
cur_mid++;
}
spin_unlock(&GlobalMid_Lock);
+
+ if (reconnect) {
+ spin_lock(&cifs_tcp_ses_lock);
+ server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+
return mid;
}
@@ -414,14 +422,16 @@ cifs_need_neg(struct TCP_Server_Info *server)
}
static int
-cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
+cifs_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc;
- rc = CIFSSMBNegotiate(xid, ses);
+ rc = CIFSSMBNegotiate(xid, ses, server);
if (rc == -EAGAIN) {
/* retry only once on 1st time connection */
- set_credits(ses->server, 1);
- rc = CIFSSMBNegotiate(xid, ses);
+ set_credits(server, 1);
+ rc = CIFSSMBNegotiate(xid, ses, server);
if (rc == -EAGAIN)
rc = -EHOSTDOWN;
}
@@ -878,7 +888,7 @@ cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc = -EOPNOTSUPP;
- buf->f_type = CIFS_MAGIC_NUMBER;
+ buf->f_type = CIFS_SUPER_MAGIC;
/*
* We could add a second check for a QFS Unix capability bit
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index ca692b2283cd..4125fd113cfb 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -13,8 +13,6 @@
#ifndef _SMB2_GLOB_H
#define _SMB2_GLOB_H
-#define SMB2_MAGIC_NUMBER 0xFE534D42
-
/*
*****************************************************************
* Constants go here
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index cdcdef32759e..b25623e3fe3d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -847,16 +847,17 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
* SMB2 header.
*
* @ses: server session structure
+ * @server: pointer to server info
* @iov: array containing the SMB request we will send to the server
* @nvec: number of array entries for the iov
*/
int
-smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
+smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
+ struct kvec *iov, int nvec)
{
int i, rc;
struct sdesc *d;
struct smb2_hdr *hdr;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
hdr = (struct smb2_hdr *)iov[0].iov_base;
/* neg prot are always taken */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5b1dea54ebc..af5d0830bc8a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -13,6 +13,7 @@
#include <linux/sort.h>
#include <crypto/aead.h>
#include <linux/fiemap.h>
+#include <uapi/linux/magic.h>
#include "cifsfs.h"
#include "cifsglob.h"
#include "smb2pdu.h"
@@ -121,9 +122,13 @@ smb2_add_credits(struct TCP_Server_Info *server,
optype, scredits, add);
}
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect
- || server->tcpStatus == CifsExiting)
+ || server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
switch (rc) {
case -1:
@@ -208,11 +213,15 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
return rc;
spin_lock(&server->req_lock);
} else {
+ spin_unlock(&server->req_lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->req_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
}
+ spin_unlock(&cifs_tcp_ses_lock);
+ spin_lock(&server->req_lock);
scredits = server->credits;
/* can deadlock with reopen */
if (scredits <= 8) {
@@ -384,14 +393,16 @@ smb2_need_neg(struct TCP_Server_Info *server)
}
static int
-smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+smb2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc;
spin_lock(&GlobalMid_Lock);
- cifs_ses_server(ses)->CurrentMid = 0;
+ server->CurrentMid = 0;
spin_unlock(&GlobalMid_Lock);
- rc = SMB2_negotiate(xid, ses);
+ rc = SMB2_negotiate(xid, ses, server);
/* BB we probably don't need to retry with modern servers */
if (rc == -EAGAIN)
rc = -EHOSTDOWN;
@@ -2747,7 +2758,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
goto qfs_exit;
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
- buf->f_type = SMB2_MAGIC_NUMBER;
+ buf->f_type = SMB2_SUPER_MAGIC;
info = (struct smb2_fs_full_size_info *)(
le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
@@ -2789,7 +2800,7 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid,
fid.volatile_fid, buf);
- buf->f_type = SMB2_MAGIC_NUMBER;
+ buf->f_type = SMB2_SUPER_MAGIC;
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return rc;
}
@@ -4808,7 +4819,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
if (!is_offloaded)
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
@@ -4981,10 +4992,12 @@ static void smb2_decrypt_offload(struct work_struct *work)
mid->callback(mid);
} else {
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&GlobalMid_Lock);
if (dw->server->tcpStatus == CifsNeedReconnect) {
mid->mid_state = MID_RETRY_NEEDED;
spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
mid->callback(mid);
} else {
mid->mid_state = MID_REQUEST_SUBMITTED;
@@ -4992,6 +5005,7 @@ static void smb2_decrypt_offload(struct work_struct *work)
list_add_tail(&mid->qhead,
&dw->server->pending_mid_q);
spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
}
}
cifs_mid_q_entry_release(mid);
@@ -5221,13 +5235,13 @@ smb3_receive_transform(struct TCP_Server_Info *server,
sizeof(struct smb2_hdr)) {
cifs_server_dbg(VFS, "Transform message is too small (%u)\n",
pdu_length);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) {
cifs_server_dbg(VFS, "Transform message is broken\n");
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8b3670388cda..7e7909b1ae11 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,6 +162,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
return 0;
+ spin_lock(&cifs_tcp_ses_lock);
if (tcon->tidStatus == CifsExiting) {
/*
* only tree disconnect, open, and write,
@@ -171,11 +172,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
if ((smb2_command != SMB2_WRITE) &&
(smb2_command != SMB2_CREATE) &&
(smb2_command != SMB2_TREE_DISCONNECT)) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "can not send cmd %d while umounting\n",
smb2_command);
return -ENODEV;
}
}
+ spin_unlock(&cifs_tcp_ses_lock);
if ((!tcon->ses) || (tcon->ses->status == CifsExiting) ||
(!tcon->ses->server) || !server)
return -EIO;
@@ -214,8 +217,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
}
/* are we still trying to reconnect? */
- if (server->tcpStatus != CifsNeedReconnect)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
break;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (retries && --retries)
continue;
@@ -232,64 +239,74 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
retries = server->nr_targets;
}
- if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
+ cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d",
+ tcon->ses->chans_need_reconnect,
+ tcon->need_reconnect);
nls_codepage = load_nls_default();
/*
- * need to prevent multiple threads trying to simultaneously reconnect
- * the same SMB session
- */
- mutex_lock(&tcon->ses->session_mutex);
-
- /*
* Recheck after acquire mutex. If another thread is negotiating
* and the server never sends an answer the socket will be closed
* and tcpStatus set to reconnect.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EHOSTDOWN;
- mutex_unlock(&tcon->ses->session_mutex);
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
/*
- * If we are reconnecting an extra channel, bind
+ * need to prevent multiple threads trying to simultaneously
+ * reconnect the same SMB session
*/
- if (CIFS_SERVER_IS_CHAN(server)) {
- ses->binding = true;
- ses->binding_chan = cifs_ses_find_chan(ses, server);
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
+
+ /* this means that we only need to tree connect */
+ if (tcon->need_reconnect)
+ goto skip_sess_setup;
+
+ goto out;
}
+ spin_unlock(&ses->chan_lock);
- rc = cifs_negotiate_protocol(0, tcon->ses);
- if (!rc && tcon->ses->need_reconnect) {
- rc = cifs_setup_session(0, tcon->ses, nls_codepage);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(0, ses, server);
+ if (!rc) {
+ rc = cifs_setup_session(0, ses, server, nls_codepage);
if ((rc == -EACCES) && !tcon->retry) {
+ mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
- ses->binding = false;
- ses->binding_chan = NULL;
- mutex_unlock(&tcon->ses->session_mutex);
goto failed;
}
+ } else {
+ mutex_unlock(&ses->session_mutex);
+ goto out;
}
- /*
- * End of channel binding
- */
- ses->binding = false;
- ses->binding_chan = NULL;
+ mutex_unlock(&ses->session_mutex);
- if (rc || !tcon->need_reconnect) {
- mutex_unlock(&tcon->ses->session_mutex);
+skip_sess_setup:
+ mutex_lock(&ses->session_mutex);
+ if (!tcon->need_reconnect) {
+ mutex_unlock(&ses->session_mutex);
goto out;
}
-
cifs_mark_open_files_invalid(tcon);
if (tcon->use_persistent)
tcon->need_reopen_files = true;
rc = cifs_tree_connect(0, tcon, nls_codepage);
- mutex_unlock(&tcon->ses->session_mutex);
+ mutex_unlock(&ses->session_mutex);
cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
if (rc) {
@@ -833,7 +850,9 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
*/
int
-SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+SMB2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct smb_rqst rqst;
struct smb2_negotiate_req *req;
@@ -842,7 +861,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
int blob_offset, blob_length;
char *security_blob;
int flags = CIFS_NEG_OP;
@@ -1221,6 +1239,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
struct SMB2_sess_data {
unsigned int xid;
struct cifs_ses *ses;
+ struct TCP_Server_Info *server;
struct nls_table *nls_cp;
void (*func)(struct SMB2_sess_data *);
int result;
@@ -1242,9 +1261,10 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_req *req;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
unsigned int total_len;
+ bool is_binding = false;
rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, server,
(void **) &req,
@@ -1252,11 +1272,16 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
if (rc)
return rc;
- if (sess_data->ses->binding) {
- req->hdr.SessionId = cpu_to_le64(sess_data->ses->Suid);
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
+ if (is_binding) {
+ req->hdr.SessionId = cpu_to_le64(ses->Suid);
req->hdr.Flags |= SMB2_FLAGS_SIGNED;
req->PreviousSessionId = 0;
req->Flags = SMB2_SESSION_REQ_FLAG_BINDING;
+ cifs_dbg(FYI, "Binding to sess id: %llx\n", ses->Suid);
} else {
/* First session, not a reauthenticate */
req->hdr.SessionId = 0;
@@ -1266,6 +1291,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
*/
req->PreviousSessionId = cpu_to_le64(sess_data->previous_session);
req->Flags = 0; /* MBZ */
+ cifs_dbg(FYI, "Fresh session. Previous: %llx\n",
+ sess_data->previous_session);
}
/* enough to enable echos and oplocks and one max size write */
@@ -1325,7 +1352,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
/* BB add code to build os and lm fields */
rc = cifs_send_recv(sess_data->xid, sess_data->ses,
- cifs_ses_server(sess_data->ses),
+ sess_data->server,
&rqst,
&sess_data->buf0_type,
CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
@@ -1340,11 +1367,11 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
{
int rc = 0;
struct cifs_ses *ses = sess_data->ses;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
+ struct TCP_Server_Info *server = sess_data->server;
mutex_lock(&server->srv_mutex);
if (server->ops->generate_signingkey) {
- rc = server->ops->generate_signingkey(ses);
+ rc = server->ops->generate_signingkey(ses, server);
if (rc) {
cifs_dbg(FYI,
"SMB3 session key generation failed\n");
@@ -1359,14 +1386,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "SMB2/3 session established successfully\n");
- /* keep existing ses state if binding */
- if (!ses->binding) {
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
- }
-
return rc;
}
@@ -1376,15 +1395,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct cifs_spnego_msg *msg;
struct key *spnego_key = NULL;
struct smb2_sess_setup_rsp *rsp = NULL;
+ bool is_binding = false;
rc = SMB2_sess_alloc_buffer(sess_data);
if (rc)
goto out;
- spnego_key = cifs_get_spnego_key(ses);
+ spnego_key = cifs_get_spnego_key(ses, server);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
if (rc == -ENOKEY)
@@ -1405,8 +1426,12 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
goto out_put_spnego_key;
}
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep session key if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1427,7 +1452,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
/* keep session id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1459,10 +1484,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_rsp *rsp = NULL;
unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
u16 blob_length = 0;
+ bool is_binding = false;
/*
* If memory allocation is successful, caller of this function
@@ -1479,8 +1506,8 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
if (rc)
goto out_err;
- rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob,
- &blob_length, ses,
+ rc = build_ntlmssp_smb3_negotiate_blob(&ntlmssp_blob,
+ &blob_length, ses, server,
sess_data->nls_cp);
if (rc)
goto out_err;
@@ -1519,8 +1546,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep existing ses id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1545,11 +1576,13 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_req *req;
struct smb2_sess_setup_rsp *rsp = NULL;
unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
u16 blob_length = 0;
+ bool is_binding = false;
rc = SMB2_sess_alloc_buffer(sess_data);
if (rc)
@@ -1558,8 +1591,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
req->hdr.SessionId = cpu_to_le64(ses->Suid);
- rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
- sess_data->nls_cp);
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length,
+ ses, server,
+ sess_data->nls_cp);
if (rc) {
cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc);
goto out;
@@ -1580,8 +1614,12 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep existing ses id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1612,11 +1650,13 @@ out:
}
static int
-SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
+SMB2_select_sec(struct SMB2_sess_data *sess_data)
{
int type;
+ struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- type = smb2_select_sectype(cifs_ses_server(ses), ses->sectype);
+ type = smb2_select_sectype(server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
if (type == Unspecified) {
cifs_dbg(VFS, "Unable to select appropriate authentication method!\n");
@@ -1640,10 +1680,10 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
int
SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
struct SMB2_sess_data *sess_data;
cifs_dbg(FYI, "Session Setup\n");
@@ -1657,15 +1697,17 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
if (!sess_data)
return -ENOMEM;
- rc = SMB2_select_sec(ses, sess_data);
- if (rc)
- goto out;
sess_data->xid = xid;
sess_data->ses = ses;
+ sess_data->server = server;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
sess_data->previous_session = ses->Suid;
+ rc = SMB2_select_sec(sess_data);
+ if (rc)
+ goto out;
+
/*
* Initialize the session hash with the server one.
*/
@@ -1704,8 +1746,12 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
return -EIO;
/* no need to send SMB logoff if uid already closed due to reconnect */
- if (ses->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
+ spin_unlock(&ses->chan_lock);
goto smb2_session_already_dead;
+ }
+ spin_unlock(&ses->chan_lock);
rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, ses->server,
(void **) &req, &total_len);
@@ -1867,8 +1913,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->share_flags = le32_to_cpu(rsp->ShareFlags);
tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
- tcon->tidStatus = CifsGood;
- tcon->need_reconnect = false;
tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId);
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
@@ -1913,8 +1957,13 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (!ses || !(ses->server))
return -EIO;
- if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
+ spin_lock(&ses->chan_lock);
+ if ((tcon->need_reconnect) ||
+ (CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses))) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
close_cached_dir_lease(&tcon->crfid);
@@ -2527,8 +2576,13 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
cp = load_nls_default();
cifs_strtoUTF16(*out_path, treename, treename_len, cp);
- UniStrcat(*out_path, sep);
- UniStrcat(*out_path, path);
+
+ /* Do not append the separator if the path is empty */
+ if (path[0] != cpu_to_le16(0x0000)) {
+ UniStrcat(*out_path, sep);
+ UniStrcat(*out_path, path);
+ }
+
unload_nls(cp);
return 0;
@@ -3722,27 +3776,35 @@ void smb2_reconnect_server(struct work_struct *work)
{
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, reconnect.work);
- struct cifs_ses *ses;
+ struct TCP_Server_Info *pserver;
+ struct cifs_ses *ses, *ses2;
struct cifs_tcon *tcon, *tcon2;
- struct list_head tmp_list;
- int tcon_exist = false;
+ struct list_head tmp_list, tmp_ses_list;
+ bool tcon_exist = false, ses_exist = false;
+ bool tcon_selected = false;
int rc;
- int resched = false;
+ bool resched = false;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
- mutex_lock(&server->reconnect_mutex);
+ mutex_lock(&pserver->reconnect_mutex);
INIT_LIST_HEAD(&tmp_list);
- cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+ INIT_LIST_HEAD(&tmp_ses_list);
+ cifs_dbg(FYI, "Reconnecting tcons and channels\n");
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+
+ tcon_selected = false;
+
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
list_add_tail(&tcon->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
}
}
/*
@@ -3751,15 +3813,27 @@ void smb2_reconnect_server(struct work_struct *work)
*/
if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
ses->ses_count++;
}
+ /*
+ * handle the case where channel needs to reconnect
+ * binding session, but tcon is healthy (some other channel
+ * is active)
+ */
+ spin_lock(&ses->chan_lock);
+ if (!tcon_selected && cifs_chan_needs_reconnect(ses, server)) {
+ list_add_tail(&ses->rlist, &tmp_ses_list);
+ ses_exist = true;
+ ses->ses_count++;
+ }
+ spin_unlock(&ses->chan_lock);
}
/*
* Get the reference to server struct to be sure that the last call of
* cifs_put_tcon() in the loop below won't release the server pointer.
*/
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
server->srv_count++;
spin_unlock(&cifs_tcp_ses_lock);
@@ -3777,13 +3851,41 @@ void smb2_reconnect_server(struct work_struct *work)
cifs_put_tcon(tcon);
}
- cifs_dbg(FYI, "Reconnecting tcons finished\n");
+ if (!ses_exist)
+ goto done;
+
+ /* allocate a dummy tcon struct used for reconnect */
+ tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
+ if (!tcon) {
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ goto done;
+ }
+
+ tcon->tidStatus = CifsGood;
+ tcon->retry = false;
+ tcon->need_reconnect = false;
+
+ /* now reconnect sessions for necessary channels */
+ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
+ tcon->ses = ses;
+ rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+ if (rc)
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ }
+ kfree(tcon);
+
+done:
+ cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
if (resched)
queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
- mutex_unlock(&server->reconnect_mutex);
+ mutex_unlock(&pserver->reconnect_mutex);
/* now we can safely release srv struct */
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
cifs_put_tcp_session(server, 1);
}
@@ -3797,13 +3899,16 @@ SMB2_echo(struct TCP_Server_Info *server)
.rq_nvec = 1 };
unsigned int total_len;
- cifs_dbg(FYI, "In echo request\n");
+ cifs_dbg(FYI, "In echo request for conn_id %lld\n", server->conn_id);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
/* No need to send echo on newly established connections */
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
return rc;
}
+ spin_unlock(&cifs_tcp_ses_lock);
rc = smb2_plain_req_init(SMB2_ECHO, NULL, server,
(void **)&req, &total_len);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 096fada16ebd..4a7062fd1c26 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -123,8 +123,11 @@ extern void smb2_set_related(struct smb_rqst *rqst);
* SMB2 Worker functions - most of protocol specific implementation details
* are contained within these calls.
*/
-extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses);
+extern int SMB2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
extern int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses);
extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses,
@@ -276,6 +279,7 @@ extern void smb2_copy_fs_info_to_kstatfs(
struct kstatfs *kst);
extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
extern int smb311_update_preauth_hash(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct kvec *iov, int nvec);
extern int smb2_query_info_compound(const unsigned int xid,
struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 2bf047b390a9..2af79093b78b 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -100,13 +100,16 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
goto out;
found:
- if (ses->binding) {
+ spin_lock(&ses->chan_lock);
+ if (cifs_chan_needs_reconnect(ses, server) &&
+ !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
/*
* If we are in the process of binding a new channel
* to an existing session, use the master connection
* session key
*/
memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
@@ -118,9 +121,11 @@ found:
chan = ses->chans + i;
if (chan->server == server) {
memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
}
+ spin_unlock(&ses->chan_lock);
cifs_dbg(VFS,
"%s: Could not find channel signing key for session 0x%llx\n",
@@ -390,12 +395,18 @@ struct derivation_triplet {
static int
generate_smb3signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct derivation_triplet *ptriplet)
{
int rc;
-#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
- struct TCP_Server_Info *server = ses->server;
-#endif
+ bool is_binding = false;
+ int chan_index = 0;
+
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ chan_index = cifs_ses_get_chan_index(ses, server);
+ /* TODO: introduce ref counting for channels when the can be freed */
+ spin_unlock(&ses->chan_lock);
/*
* All channels use the same encryption/decryption keys but
@@ -407,10 +418,10 @@ generate_smb3signingkey(struct cifs_ses *ses,
* master connection signing key stored in the session
*/
- if (ses->binding) {
+ if (is_binding) {
rc = generate_key(ses, ptriplet->signing.label,
ptriplet->signing.context,
- cifs_ses_binding_channel(ses)->signkey,
+ ses->chans[chan_index].signkey,
SMB3_SIGN_KEY_SIZE);
if (rc)
return rc;
@@ -422,8 +433,11 @@ generate_smb3signingkey(struct cifs_ses *ses,
if (rc)
return rc;
+ /* safe to access primary channel, since it will never go away */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
rc = generate_key(ses, ptriplet->encryption.label,
ptriplet->encryption.context,
@@ -470,7 +484,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
}
int
-generate_smb30signingkey(struct cifs_ses *ses)
+generate_smb30signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct derivation_triplet triplet;
@@ -494,11 +509,12 @@ generate_smb30signingkey(struct cifs_ses *ses)
d->context.iov_base = "ServerOut";
d->context.iov_len = 10;
- return generate_smb3signingkey(ses, &triplet);
+ return generate_smb3signingkey(ses, server, &triplet);
}
int
-generate_smb311signingkey(struct cifs_ses *ses)
+generate_smb311signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct derivation_triplet triplet;
@@ -522,7 +538,7 @@ generate_smb311signingkey(struct cifs_ses *ses)
d->context.iov_base = ses->preauth_sha_hash;
d->context.iov_len = 64;
- return generate_smb3signingkey(ses, &triplet);
+ return generate_smb3signingkey(ses, server, &triplet);
}
int
@@ -624,8 +640,12 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
if (!is_signed)
return 0;
- if (server->tcpStatus == CifsNeedNegotiate)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return 0;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (!is_binding && !server->session_estab) {
strncpy(shdr->Signature, "BSRSPYL", 8);
return 0;
@@ -741,30 +761,41 @@ static int
smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server,
struct smb2_hdr *shdr, struct mid_q_entry **mid)
{
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
return -EAGAIN;
}
if (server->tcpStatus == CifsNeedNegotiate &&
- shdr->Command != SMB2_NEGOTIATE)
+ shdr->Command != SMB2_NEGOTIATE) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
if (ses->status == CifsNew) {
if ((shdr->Command != SMB2_SESSION_SETUP) &&
- (shdr->Command != SMB2_NEGOTIATE))
+ (shdr->Command != SMB2_NEGOTIATE)) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are setting up session */
}
if (ses->status == CifsExiting) {
- if (shdr->Command != SMB2_LOGOFF)
+ if (shdr->Command != SMB2_LOGOFF) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are shutting down the session */
}
+ spin_unlock(&cifs_tcp_ses_lock);
*mid = smb2_mid_entry_alloc(shdr, server);
if (*mid == NULL)
@@ -837,9 +868,13 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
(struct smb2_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate &&
- shdr->Command != SMB2_NEGOTIATE)
+ shdr->Command != SMB2_NEGOTIATE) {
+ spin_unlock(&cifs_tcp_ses_lock);
return ERR_PTR(-EAGAIN);
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
smb2_seq_num_into_buf(server, shdr);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 61ea3d3f95b4..8540f7c13eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -430,9 +430,10 @@ unmask:
* be taken as the remainder of this one. We need to kill the
* socket so the server throws away the partial SMB
*/
- spin_lock(&GlobalMid_Lock);
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsExiting)
+ server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&cifs_tcp_ses_lock);
trace_smb3_partial_send_reconnect(server->CurrentMid,
server->conn_id, server->hostname);
}
@@ -578,10 +579,14 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
return -ERESTARTSYS;
spin_lock(&server->req_lock);
} else {
+ spin_unlock(&server->req_lock);
+
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->req_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
}
+ spin_unlock(&cifs_tcp_ses_lock);
/*
* For normal commands, reserve the last MAX_COMPOUND
@@ -596,6 +601,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
* for servers that are slow to hand out credits on
* new sessions.
*/
+ spin_lock(&server->req_lock);
if (!optype && num_credits == 1 &&
server->in_flight > 2 * MAX_COMPOUND &&
*credits <= MAX_COMPOUND) {
@@ -723,28 +729,25 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
- if (ses->server->tcpStatus == CifsExiting) {
- return -ENOENT;
- }
-
- if (ses->server->tcpStatus == CifsNeedReconnect) {
- cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
- return -EAGAIN;
- }
-
+ spin_lock(&cifs_tcp_ses_lock);
if (ses->status == CifsNew) {
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
- (in_buf->Command != SMB_COM_NEGOTIATE))
+ (in_buf->Command != SMB_COM_NEGOTIATE)) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are setting up session */
}
if (ses->status == CifsExiting) {
/* check if SMB session is bad because we are setting it up */
- if (in_buf->Command != SMB_COM_LOGOFF_ANDX)
+ if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are shutting down session */
}
+ spin_unlock(&cifs_tcp_ses_lock);
*ppmidQ = AllocMidQEntry(in_buf, ses->server);
if (*ppmidQ == NULL)
@@ -1044,19 +1047,14 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
if (!ses)
return NULL;
+ /* round robin */
+ index = (uint)atomic_inc_return(&ses->chan_seq);
+
spin_lock(&ses->chan_lock);
- if (!ses->binding) {
- /* round robin */
- if (ses->chan_count > 1) {
- index = (uint)atomic_inc_return(&ses->chan_seq);
- index %= ses->chan_count;
- }
- spin_unlock(&ses->chan_lock);
- return ses->chans[index].server;
- } else {
- spin_unlock(&ses->chan_lock);
- return cifs_ses_server(ses);
- }
+ index %= ses->chan_count;
+ spin_unlock(&ses->chan_lock);
+
+ return ses->chans[index].server;
}
int
@@ -1084,8 +1082,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/*
* Wait for all the requests to become available.
@@ -1188,12 +1190,17 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+
mutex_lock(&server->srv_mutex);
- smb311_update_preauth_hash(ses, rqst[0].rq_iov,
- rqst[0].rq_nvec);
+ smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
mutex_unlock(&server->srv_mutex);
+
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(server, midQ[i]);
@@ -1256,15 +1263,19 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
struct kvec iov = {
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
};
+ spin_unlock(&cifs_tcp_ses_lock);
mutex_lock(&server->srv_mutex);
- smb311_update_preauth_hash(ses, &iov, 1);
+ smb311_update_preauth_hash(ses, server, &iov, 1);
mutex_unlock(&server->srv_mutex);
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
out:
/*
@@ -1353,8 +1364,12 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/* Ensure that we do not send more than 50 overlapping requests
to the same server. We may make this configurable later or
@@ -1494,8 +1509,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/* Ensure that we do not send more than 50 overlapping requests
to the same server. We may make this configurable later or
@@ -1553,10 +1572,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
(server->tcpStatus != CifsNew)));
/* Were we interrupted by a signal ? */
+ spin_lock(&cifs_tcp_ses_lock);
if ((rc == -ERESTARTSYS) &&
(midQ->mid_state == MID_REQUEST_SUBMITTED) &&
((server->tcpStatus == CifsGood) ||
(server->tcpStatus == CifsNew))) {
+ spin_unlock(&cifs_tcp_ses_lock);
if (in_buf->Command == SMB_COM_TRANSACTION2) {
/* POSIX lock. We send a NT_CANCEL SMB to cause the
@@ -1595,7 +1616,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
/* We got the response - restart system call. */
rstart = 1;
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
rc = cifs_sync_mid_result(midQ, server);
if (rc != 0)
diff --git a/fs/coredump.c b/fs/coredump.c
index a6b3c196cdef..1c060c0a2d72 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/timekeeping.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -52,9 +53,9 @@
#include <trace/events/sched.h>
-int core_uses_pid;
-unsigned int core_pipe_limit;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
+static int core_uses_pid;
+static unsigned int core_pipe_limit;
+static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
struct core_name {
@@ -62,8 +63,6 @@ struct core_name {
int used, size;
};
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static int expand_corename(struct core_name *cn, int size)
{
char *corename = krealloc(cn->corename, size, GFP_KERNEL);
@@ -347,13 +346,13 @@ out:
return ispipe;
}
-static int zap_process(struct task_struct *start, int exit_code, int flags)
+static int zap_process(struct task_struct *start, int exit_code)
{
struct task_struct *t;
int nr = 0;
/* ignore all signals except SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
+ start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -372,13 +371,13 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
static int zap_threads(struct task_struct *tsk,
struct core_state *core_state, int exit_code)
{
+ struct signal_struct *signal = tsk->signal;
int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- tsk->signal->core_state = core_state;
- tsk->signal->group_exit_task = tsk;
- nr = zap_process(tsk, exit_code, 0);
+ if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+ signal->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
atomic_set(&core_state->nr_threads, nr);
@@ -426,8 +425,6 @@ static void coredump_finish(bool core_dumped)
spin_lock_irq(&current->sighand->siglock);
if (core_dumped && !__fatal_signal_pending(current))
current->signal->group_exit_code |= 0x80;
- current->signal->group_exit_task = NULL;
- current->signal->flags = SIGNAL_GROUP_EXIT;
next = current->signal->core_state->dumper.next;
current->signal->core_state = NULL;
spin_unlock_irq(&current->sighand->siglock);
@@ -895,6 +892,63 @@ int dump_align(struct coredump_params *cprm, int align)
}
EXPORT_SYMBOL(dump_align);
+#ifdef CONFIG_SYSCTL
+
+void validate_coredump_safety(void)
+{
+ if (suid_dumpable == SUID_DUMP_ROOT &&
+ core_pattern[0] != '/' && core_pattern[0] != '|') {
+ pr_warn(
+"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
+"Pipe handler or fully qualified core dump path required.\n"
+"Set kernel.core_pattern before fs.suid_dumpable.\n"
+ );
+ }
+}
+
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dostring(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table coredump_sysctls[] = {
+ {
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "core_pattern",
+ .data = core_pattern,
+ .maxlen = CORENAME_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring_coredump,
+ },
+ {
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static int __init init_fs_coredump_sysctls(void)
+{
+ register_sysctl_init("kernel", coredump_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_coredump_sysctls);
+#endif /* CONFIG_SYSCTL */
+
/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..c84269c6e8bf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -115,10 +115,13 @@ static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
}
-
-/* Statistics gathering. */
-struct dentry_stat_t dentry_stat = {
- .age_limit = 45,
+struct dentry_stat_t {
+ long nr_dentry;
+ long nr_unused;
+ long age_limit; /* age in seconds */
+ long want_pages; /* pages requested by system */
+ long nr_negative; /* # of unused negative dentries */
+ long dummy; /* Reserved for future use */
};
static DEFINE_PER_CPU(long, nr_dentry);
@@ -126,6 +129,10 @@ static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+/* Statistics gathering. */
+static struct dentry_stat_t dentry_stat = {
+ .age_limit = 45,
+};
/*
* Here we resort to our own counters instead of using generic per-cpu counters
@@ -167,14 +174,32 @@ static long get_nr_dentry_negative(void)
return sum < 0 ? 0 : sum;
}
-int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
dentry_stat.nr_dentry = get_nr_dentry();
dentry_stat.nr_unused = get_nr_dentry_unused();
dentry_stat.nr_negative = get_nr_dentry_negative();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table fs_dcache_sysctls[] = {
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ { }
+};
+
+static int __init init_fs_dcache_sysctls(void)
+{
+ register_sysctl_init("fs", fs_dcache_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_dcache_sysctls);
#endif
/*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 06f4c5ae1451..e2daa940ebce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -307,7 +307,7 @@ static void unlist_file(struct epitems_head *head)
static long long_zero;
static long long_max = LONG_MAX;
-struct ctl_table epoll_table[] = {
+static struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
@@ -319,6 +319,13 @@ struct ctl_table epoll_table[] = {
},
{ }
};
+
+static void __init epoll_sysctls_init(void)
+{
+ register_sysctl("fs/epoll", epoll_table);
+}
+#else
+#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static const struct file_operations eventpoll_fops;
@@ -2378,6 +2385,7 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ epoll_sysctls_init();
ephead_cache = kmem_cache_create("ep_head",
sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index 537d92c41105..79f2c9483302 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,6 +65,7 @@
#include <linux/vmalloc.h>
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
+#include <linux/coredump.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1045,7 +1046,7 @@ static int de_thread(struct task_struct *tsk)
* Kill all other threads in the thread group.
*/
spin_lock_irq(lock);
- if (signal_group_exit(sig)) {
+ if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
/*
* Another group action in progress, just
* return so that the signal is processed.
@@ -1054,7 +1055,7 @@ static int de_thread(struct task_struct *tsk)
return -EAGAIN;
}
- sig->group_exit_task = tsk;
+ sig->group_exec_task = tsk;
sig->notify_count = zap_other_threads(tsk);
if (!thread_group_leader(tsk))
sig->notify_count--;
@@ -1082,7 +1083,7 @@ static int de_thread(struct task_struct *tsk)
write_lock_irq(&tasklist_lock);
/*
* Do this under tasklist_lock to ensure that
- * exit_notify() can't miss ->group_exit_task
+ * exit_notify() can't miss ->group_exec_task
*/
sig->notify_count = -1;
if (likely(leader->exit_state))
@@ -1149,7 +1150,7 @@ static int de_thread(struct task_struct *tsk)
release_task(leader);
}
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
no_thread_group:
@@ -1162,7 +1163,7 @@ no_thread_group:
killed:
/* protects against exit_notify() and __exit_signal() */
read_lock(&tasklist_lock);
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
read_unlock(&tasklist_lock);
return -EAGAIN;
@@ -1207,7 +1208,8 @@ static int unshare_sighand(struct task_struct *me)
char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
task_lock(tsk);
- strncpy(buf, tsk->comm, buf_size);
+ /* Always NUL terminated and zero-padded */
+ strscpy_pad(buf, tsk->comm, buf_size);
task_unlock(tsk);
return buf;
}
@@ -1222,7 +1224,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+ strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
@@ -1307,6 +1309,8 @@ int begin_new_exec(struct linux_binprm * bprm)
*/
force_uaccess_begin();
+ if (me->flags & PF_KTHREAD)
+ free_kthread_struct(me);
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
@@ -2096,3 +2100,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
argv, envp, flags);
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table fs_exec_sysctls[] = {
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_exec_sysctls(void)
+{
+ register_sysctl_init("fs", fs_exec_sysctls);
+ return 0;
+}
+
+fs_initcall(init_fs_exec_sysctls);
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index cc5cffc4a769..03f142307174 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -105,7 +105,7 @@ int exfat_load_bitmap(struct super_block *sb)
struct exfat_dentry *ep;
struct buffer_head *bh;
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index cb1c0d8c1714..a27b55ec060a 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -64,7 +64,6 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
{
int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext;
unsigned int type, clu_offset, max_dentries;
- sector_t sector;
struct exfat_chain dir, clu;
struct exfat_uni_name uni_name;
struct exfat_dentry *ep;
@@ -115,7 +114,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
i = dentry & (dentries_per_clu - 1);
for ( ; i < dentries_per_clu; i++, dentry++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, &sector);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
@@ -156,7 +155,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
dir_entry->namebuf.lfnbuf_len);
brelse(bh);
- ep = exfat_get_dentry(sb, &clu, i + 1, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i + 1, &bh);
if (!ep)
return -EIO;
dir_entry->size =
@@ -445,7 +444,6 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct timespec64 ts = current_time(inode);
- sector_t sector;
struct exfat_dentry *ep;
struct buffer_head *bh;
@@ -453,7 +451,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
* We cannot use exfat_get_dentry_set here because file ep is not
* initialized yet.
*/
- ep = exfat_get_dentry(sb, p_dir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ep)
return -EIO;
@@ -477,7 +475,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
exfat_update_bh(bh, IS_DIRSYNC(inode));
brelse(bh);
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
if (!ep)
return -EIO;
@@ -496,12 +494,11 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
struct super_block *sb = inode->i_sb;
int ret = 0;
int i, num_entries;
- sector_t sector;
u16 chksum;
struct exfat_dentry *ep, *fep;
struct buffer_head *fbh, *bh;
- fep = exfat_get_dentry(sb, p_dir, entry, &fbh, &sector);
+ fep = exfat_get_dentry(sb, p_dir, entry, &fbh);
if (!fep)
return -EIO;
@@ -509,7 +506,7 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
chksum = exfat_calc_chksum16(fep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
for (i = 1; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep) {
ret = -EIO;
goto release_fbh;
@@ -531,13 +528,12 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
{
struct super_block *sb = inode->i_sb;
int i;
- sector_t sector;
unsigned short *uniname = p_uniname->name;
struct exfat_dentry *ep;
struct buffer_head *bh;
int sync = IS_DIRSYNC(inode);
- ep = exfat_get_dentry(sb, p_dir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ep)
return -EIO;
@@ -545,7 +541,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
exfat_update_bh(bh, sync);
brelse(bh);
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
if (!ep)
return -EIO;
@@ -555,7 +551,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
brelse(bh);
for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep)
return -EIO;
@@ -574,12 +570,11 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir,
{
struct super_block *sb = inode->i_sb;
int i;
- sector_t sector;
struct exfat_dentry *ep;
struct buffer_head *bh;
for (i = order; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep)
return -EIO;
@@ -656,8 +651,8 @@ static int exfat_walk_fat_chain(struct super_block *sb,
return 0;
}
-int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, sector_t *sector, int *offset)
+static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
+ int entry, sector_t *sector, int *offset)
{
int ret;
unsigned int off, clu = 0;
@@ -717,8 +712,7 @@ static int exfat_dir_readahead(struct super_block *sb, sector_t sec)
}
struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
- struct exfat_chain *p_dir, int entry, struct buffer_head **bh,
- sector_t *sector)
+ struct exfat_chain *p_dir, int entry, struct buffer_head **bh)
{
unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE);
int off;
@@ -740,8 +734,6 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
if (!*bh)
return NULL;
- if (sector)
- *sector = sec;
return (struct exfat_dentry *)((*bh)->b_data + off);
}
@@ -892,7 +884,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
es->bh[es->num_bh++] = bh;
}
- /* validiate cached dentries */
+ /* validate cached dentries */
for (i = 1; i < num_entries; i++) {
ep = exfat_get_dentry_cached(es, i);
if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
@@ -960,7 +952,7 @@ rewind:
if (rewind && dentry == end_eidx)
goto not_found;
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
@@ -1145,7 +1137,7 @@ int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir,
struct buffer_head *bh;
for (i = 0, entry++; i < ep->dentry.file.num_ext; i++, entry++) {
- ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh, NULL);
+ ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ext_ep)
return -EIO;
@@ -1175,7 +1167,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir)
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
entry_type = exfat_get_entry_type(ep);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 1d6da61157c9..619e5b4bed10 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -10,7 +10,6 @@
#include <linux/ratelimit.h>
#include <linux/nls.h>
-#define EXFAT_SUPER_MAGIC 0x2011BAB0UL
#define EXFAT_ROOT_INO 1
#define EXFAT_CLUSTERS_UNTRACKED (~0u)
@@ -459,11 +458,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname,
int num_entries, unsigned int type, struct exfat_hint *hint_opt);
int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu);
-int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, sector_t *sector, int *offset);
struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
- struct exfat_chain *p_dir, int entry, struct buffer_head **bh,
- sector_t *sector);
+ struct exfat_chain *p_dir, int entry, struct buffer_head **bh);
struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es,
int num);
struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index e949e563443c..a3464e56a7e1 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -84,9 +84,7 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc,
static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
unsigned int clus)
{
- if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus)
- return false;
- return true;
+ return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
}
int exfat_ent_get(struct super_block *sb, unsigned int loc,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 6af0191b648f..d890fd34bb2d 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -110,8 +110,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
exfat_set_volume_dirty(sb);
num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
- num_clusters_phys =
- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi);
+ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
@@ -228,12 +227,13 @@ void exfat_truncate(struct inode *inode, loff_t size)
{
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_inode_info *ei = EXFAT_I(inode);
unsigned int blocksize = i_blocksize(inode);
loff_t aligned_size;
int err;
mutex_lock(&sbi->s_lock);
- if (EXFAT_I(inode)->start_clu == 0) {
+ if (ei->start_clu == 0) {
/*
* Empty start_clu != ~0 (not allocated)
*/
@@ -251,8 +251,8 @@ void exfat_truncate(struct inode *inode, loff_t size)
else
mark_inode_dirty(inode);
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
- ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
write_size:
aligned_size = i_size_read(inode);
if (aligned_size & (blocksize - 1)) {
@@ -260,11 +260,11 @@ write_size:
aligned_size++;
}
- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode))
- EXFAT_I(inode)->i_size_ondisk = aligned_size;
+ if (ei->i_size_ondisk > i_size_read(inode))
+ ei->i_size_ondisk = aligned_size;
- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode))
- EXFAT_I(inode)->i_size_aligned = aligned_size;
+ if (ei->i_size_aligned > i_size_read(inode))
+ ei->i_size_aligned = aligned_size;
mutex_unlock(&sbi->s_lock);
}
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1c7aa1ea4724..df805bd05508 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -31,7 +31,7 @@ static int __exfat_write_inode(struct inode *inode, int sync)
return 0;
/*
- * If the indode is already unlinked, there is no need for updating it.
+ * If the inode is already unlinked, there is no need for updating it.
*/
if (ei->dir.dir == DIR_DELETED)
return 0;
@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
unsigned int local_clu_offset = clu_offset;
unsigned int num_to_be_allocated = 0, num_clusters = 0;
- if (EXFAT_I(inode)->i_size_ondisk > 0)
+ if (ei->i_size_ondisk > 0)
num_clusters =
- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk,
- sbi);
+ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
if (clu_offset >= num_clusters)
num_to_be_allocated = clu_offset - num_clusters + 1;
@@ -416,10 +415,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
+ if (ei->i_size_aligned < i_size_read(inode)) {
exfat_fs_error(inode->i_sb,
"invalid size(size(%llu) > aligned(%llu)\n",
- i_size_read(inode), EXFAT_I(inode)->i_size_aligned);
+ i_size_read(inode), ei->i_size_aligned);
return -EIO;
}
@@ -603,8 +602,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
exfat_save_attr(inode, info->attr);
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
- ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits;
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
inode->i_mtime = info->mtime;
inode->i_ctime = info->mtime;
ei->i_crtime = info->crtime;
diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c
index d34e6193258d..d5bd8e6d9741 100644
--- a/fs/exfat/misc.c
+++ b/fs/exfat/misc.c
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
+#include <linux/blk_types.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
@@ -180,7 +181,7 @@ int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync)
set_buffer_uptodate(bhs[i]);
mark_buffer_dirty(bhs[i]);
if (sync)
- write_dirty_buffer(bhs[i], 0);
+ write_dirty_buffer(bhs[i], REQ_SYNC);
}
for (i = 0; i < nr_bhs && sync; i++) {
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 24b41103d1cc..af4eb39cc0c3 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -229,7 +229,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
i = dentry & (dentries_per_clu - 1);
for (; i < dentries_per_clu; i++, dentry++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
type = exfat_get_entry_type(ep);
@@ -306,7 +306,6 @@ static int exfat_find_empty_entry(struct inode *inode,
{
int dentry;
unsigned int ret, last_clu;
- sector_t sector;
loff_t size = 0;
struct exfat_chain clu;
struct exfat_dentry *ep = NULL;
@@ -379,7 +378,7 @@ static int exfat_find_empty_entry(struct inode *inode,
struct buffer_head *bh;
ep = exfat_get_dentry(sb,
- &(ei->dir), ei->entry + 1, &bh, &sector);
+ &(ei->dir), ei->entry + 1, &bh);
if (!ep)
return -EIO;
@@ -395,9 +394,9 @@ static int exfat_find_empty_entry(struct inode *inode,
/* directory inode should be updated in here */
i_size_write(inode, size);
- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size;
- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size;
- EXFAT_I(inode)->flags = p_dir->flags;
+ ei->i_size_ondisk += sbi->cluster_size;
+ ei->i_size_aligned += sbi->cluster_size;
+ ei->flags = p_dir->flags;
inode->i_blocks += 1 << sbi->sect_per_clus_bits;
}
@@ -779,7 +778,6 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct exfat_inode_info *ei = EXFAT_I(inode);
struct buffer_head *bh;
- sector_t sector;
int num_entries, entry, err = 0;
mutex_lock(&EXFAT_SB(sb)->s_lock);
@@ -791,7 +789,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, &cdir, entry, &bh);
if (!ep) {
err = -EIO;
goto unlock;
@@ -895,7 +893,7 @@ static int exfat_check_dir_empty(struct super_block *sb,
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
type = exfat_get_entry_type(ep);
@@ -932,7 +930,6 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_inode_info *ei = EXFAT_I(inode);
struct buffer_head *bh;
- sector_t sector;
int num_entries, entry, err;
mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -957,7 +954,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, &cdir, entry, &bh);
if (!ep) {
err = -EIO;
goto unlock;
@@ -1005,13 +1002,12 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
struct exfat_inode_info *ei)
{
int ret, num_old_entries, num_new_entries;
- sector_t sector_old, sector_new;
struct exfat_dentry *epold, *epnew;
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh, *old_bh;
int sync = IS_DIRSYNC(inode);
- epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh, &sector_old);
+ epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh);
if (!epold)
return -EIO;
@@ -1032,8 +1028,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
if (newentry < 0)
return newentry; /* -EIO or -ENOSPC */
- epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh);
if (!epnew)
return -EIO;
@@ -1046,12 +1041,10 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
brelse(old_bh);
brelse(new_bh);
- epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh,
- &sector_old);
+ epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh);
if (!epold)
return -EIO;
- epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh);
if (!epnew) {
brelse(old_bh);
return -EIO;
@@ -1093,12 +1086,11 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
struct exfat_uni_name *p_uniname, struct exfat_inode_info *ei)
{
int ret, newentry, num_new_entries, num_old_entries;
- sector_t sector_mov, sector_new;
struct exfat_dentry *epmov, *epnew;
struct super_block *sb = inode->i_sb;
struct buffer_head *mov_bh, *new_bh;
- epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh, &sector_mov);
+ epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh);
if (!epmov)
return -EIO;
@@ -1116,7 +1108,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
if (newentry < 0)
return newentry; /* -EIO or -ENOSPC */
- epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh, &sector_new);
+ epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh);
if (!epnew)
return -EIO;
@@ -1129,12 +1121,10 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
brelse(mov_bh);
brelse(new_bh);
- epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh,
- &sector_mov);
+ epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh);
if (!epmov)
return -EIO;
- epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh);
if (!epnew) {
brelse(mov_bh);
return -EIO;
@@ -1216,7 +1206,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
exfat_chain_dup(&olddir, &ei->dir);
dentry = ei->entry;
- ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh, NULL);
+ ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh);
if (!ep) {
ret = -EIO;
goto out;
@@ -1237,7 +1227,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
p_dir = &(new_ei->dir);
new_entry = new_ei->entry;
- ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
if (!ep)
goto out;
@@ -1277,7 +1267,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
if (!ret && new_inode) {
/* delete entries of new_dir */
- ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
if (!ep) {
ret = -EIO;
goto del_out;
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 314d5407a1be..ef115e673406 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -761,7 +761,7 @@ int exfat_create_upcase_table(struct super_block *sb)
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < sbi->dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 5539ffc20d16..8c9fb7dcec16 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -17,6 +17,7 @@
#include <linux/iversion.h>
#include <linux/nls.h>
#include <linux/buffer_head.h>
+#include <linux/magic.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
@@ -364,11 +365,11 @@ static int exfat_read_root(struct inode *inode)
inode->i_op = &exfat_dir_inode_operations;
inode->i_fop = &exfat_dir_operations;
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
- EXFAT_I(inode)->i_size_aligned = i_size_read(inode);
- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode);
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
+ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+ ei->i_size_aligned = i_size_read(inode);
+ ei->i_size_ondisk = i_size_read(inode);
exfat_save_attr(inode, ATTR_SUBDIR);
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1077ce7e189f..74c91da585d7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -27,8 +27,8 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/fiemap.h>
-#include <linux/backing-dev.h>
#include <linux/iomap.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "xattr.h"
@@ -4404,8 +4404,7 @@ retry:
err = ext4_es_remove_extent(inode, last_block,
EXT_MAX_BLOCKS - last_block);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry;
}
if (err)
@@ -4413,8 +4412,7 @@ retry:
retry_remove_space:
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry_remove_space;
}
return err;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 39a1ab129fdc..635bcf68a67e 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -7,7 +7,7 @@
#include <linux/iomap.h>
#include <linux/fiemap.h>
#include <linux/iversion.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -1929,8 +1929,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
retry:
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry;
}
if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cf2fd9fc7d98..9f86dd947032 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2834,7 +2834,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
@@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
++*pos;
@@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
@@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 9cb261714991..1d370364230e 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -523,12 +523,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM &&
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
- gfp_flags = GFP_NOFS;
+ gfp_t new_gfp_flags = GFP_NOFS;
if (io->io_bio)
ext4_io_submit(io);
else
- gfp_flags |= __GFP_NOFAIL;
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ new_gfp_flags |= __GFP_NOFAIL;
+ memalloc_retry_wait(gfp_flags);
+ gfp_flags = new_gfp_flags;
goto retry_encrypt;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 3db923403505..4cd62f1d848c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -43,7 +43,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "ext4.h"
@@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode,
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 &&
- !PageUptodate(page) && cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
/*
* This page will go to BIO. Do we need to send this
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0343f682504d..eee0d9ebfa6c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -39,7 +39,6 @@
#include <linux/log2.h>
#include <linux/crc16.h>
#include <linux/dax.h>
-#include <linux/cleancache.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
@@ -1966,29 +1965,22 @@ static const struct mount_opts {
static const struct ext4_sb_encodings {
__u16 magic;
char *name;
- char *version;
+ unsigned int version;
} ext4_sb_encoding_map[] = {
- {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
+ {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
};
-static int ext4_sb_read_encoding(const struct ext4_super_block *es,
- const struct ext4_sb_encodings **encoding,
- __u16 *flags)
+static const struct ext4_sb_encodings *
+ext4_sb_read_encoding(const struct ext4_super_block *es)
{
__u16 magic = le16_to_cpu(es->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
if (magic == ext4_sb_encoding_map[i].magic)
- break;
-
- if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
- return -EINVAL;
-
- *encoding = &ext4_sb_encoding_map[i];
- *flags = le16_to_cpu(es->s_encoding_flags);
+ return &ext4_sb_encoding_map[i];
- return 0;
+ return NULL;
}
#endif
@@ -3156,8 +3148,6 @@ done:
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
return err;
}
@@ -4624,10 +4614,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
- __u16 encoding_flags;
+ __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
- if (ext4_sb_read_encoding(es, &encoding_info,
- &encoding_flags)) {
+ encoding_info = ext4_sb_read_encoding(es);
+ if (!encoding_info) {
ext4_msg(sb, KERN_ERR,
"Encoding requested by superblock is unknown");
goto failed_mount;
@@ -4636,15 +4626,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
ext4_msg(sb, KERN_ERR,
- "can't mount with superblock charset: %s-%s "
+ "can't mount with superblock charset: %s-%u.%u.%u "
"not supported by the kernel. flags: 0x%x.",
- encoding_info->name, encoding_info->version,
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
encoding_flags);
goto failed_mount;
}
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
- "%s-%s with flags 0x%hx", encoding_info->name,
- encoding_info->version?:"\b", encoding_flags);
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
sb->s_encoding = encoding;
sb->s_encoding_flags = encoding_flags;
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7eea3cfd894d..f46a7339d6cf 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -7,6 +7,7 @@ config F2FS_FS
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+ select FS_IOMAP
select LZ4_COMPRESS if F2FS_FS_LZ4
select LZ4_DECOMPRESS if F2FS_FS_LZ4
select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f1693d45bb78..982f0170639f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -664,7 +664,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
- err = f2fs_get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni, false);
if (err)
goto err_out;
@@ -1302,8 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long flags;
if (cpc->reason & CP_UMOUNT) {
- if (le32_to_cpu(ckpt->cp_pack_total_block_count) >
- sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
+ if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
+ NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
f2fs_notice(sbi, "Disable nat_bits due to no space");
} else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 49121a21f749..d0c3aeba5945 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -154,6 +154,7 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
+ cc->valid_nr_cpages = 0;
if (!reuse)
cc->cluster_idx = NULL_CLUSTER;
}
@@ -620,7 +621,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
unsigned int max_len, new_nr_cpages;
- struct page **new_cpages;
u32 chksum = 0;
int i, ret;
@@ -635,6 +635,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
max_len = COMPRESS_HEADER_SIZE + cc->clen;
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
+ cc->valid_nr_cpages = cc->nr_cpages;
cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
if (!cc->cpages) {
@@ -685,13 +686,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
- /* Now we're going to cut unnecessary tail pages */
- new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
- if (!new_cpages) {
- ret = -ENOMEM;
- goto out_vunmap_cbuf;
- }
-
/* zero out any unused part of the last page */
memset(&cc->cbuf->cdata[cc->clen], 0,
(new_nr_cpages * PAGE_SIZE) -
@@ -701,10 +695,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
vm_unmap_ram(cc->rbuf, cc->cluster_size);
for (i = 0; i < cc->nr_cpages; i++) {
- if (i < new_nr_cpages) {
- new_cpages[i] = cc->cpages[i];
+ if (i < new_nr_cpages)
continue;
- }
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -712,9 +704,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
if (cops->destroy_compress_ctx)
cops->destroy_compress_ctx(cc);
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
- cc->cpages = new_cpages;
- cc->nr_cpages = new_nr_cpages;
+ cc->valid_nr_cpages = new_nr_cpages;
trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
cc->clen, ret);
@@ -1296,7 +1286,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT;
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
if (err)
goto out_put_dnode;
@@ -1308,14 +1298,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
- atomic_set(&cic->pending_pages, cc->nr_cpages);
+ atomic_set(&cic->pending_pages, cc->valid_nr_cpages);
cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
cic->nr_rpages = cc->cluster_size;
- for (i = 0; i < cc->nr_cpages; i++) {
+ for (i = 0; i < cc->valid_nr_cpages; i++) {
f2fs_set_compressed_page(cc->cpages[i], inode,
cc->rpages[i + 1]->index, cic);
fio.compressed_page = cc->cpages[i];
@@ -1360,7 +1350,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr))
fio.compr_blocks++;
- if (i > cc->nr_cpages) {
+ if (i > cc->valid_nr_cpages) {
if (__is_valid_data_blkaddr(blkaddr)) {
f2fs_invalidate_blocks(sbi, blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -1385,8 +1375,8 @@ unlock_continue:
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
- f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
- add_compr_block_stat(inode, cc->nr_cpages);
+ f2fs_i_compr_blocks_update(inode, cc->valid_nr_cpages, true);
+ add_compr_block_stat(inode, cc->valid_nr_cpages);
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
@@ -1424,9 +1414,7 @@ out_unlock_op:
else
f2fs_unlock_op(sbi);
out_free:
- for (i = 0; i < cc->nr_cpages; i++) {
- if (!cc->cpages[i])
- continue;
+ for (i = 0; i < cc->valid_nr_cpages; i++) {
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -1468,25 +1456,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret;
- int i = -1, err = 0;
+ int _submitted, compr_blocks, ret, i;
compr_blocks = f2fs_compressed_blocks(cc);
- if (compr_blocks < 0) {
- err = compr_blocks;
- goto out_err;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+
+ redirty_page_for_writepage(wbc, cc->rpages[i]);
+ unlock_page(cc->rpages[i]);
}
+ if (compr_blocks < 0)
+ return compr_blocks;
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
retry_write:
+ lock_page(cc->rpages[i]);
+
if (cc->rpages[i]->mapping != mapping) {
+continue_unlock:
unlock_page(cc->rpages[i]);
continue;
}
- BUG_ON(!PageLocked(cc->rpages[i]));
+ if (!PageDirty(cc->rpages[i]))
+ goto continue_unlock;
+
+ if (!clear_page_dirty_for_io(cc->rpages[i]))
+ goto continue_unlock;
ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
NULL, NULL, wbc, io_type,
@@ -1501,26 +1502,15 @@ retry_write:
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
- if (IS_NOQUOTA(cc->inode)) {
- err = 0;
- goto out_err;
- }
+ if (IS_NOQUOTA(cc->inode))
+ return 0;
ret = 0;
cond_resched();
congestion_wait(BLK_RW_ASYNC,
DEFAULT_IO_TIMEOUT);
- lock_page(cc->rpages[i]);
-
- if (!PageDirty(cc->rpages[i])) {
- unlock_page(cc->rpages[i]);
- continue;
- }
-
- clear_page_dirty_for_io(cc->rpages[i]);
goto retry_write;
}
- err = ret;
- goto out_err;
+ return ret;
}
*submitted += _submitted;
@@ -1529,14 +1519,6 @@ retry_write:
f2fs_balance_fs(F2FS_M_SB(mapping), true);
return 0;
-out_err:
- for (++i; i < cc->cluster_size; i++) {
- if (!cc->rpages[i])
- continue;
- redirty_page_for_writepage(wbc, cc->rpages[i]);
- unlock_page(cc->rpages[i]);
- }
- return err;
}
int f2fs_write_multi_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9f754aaef558..8c417864c66a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -8,9 +8,9 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
+#include <linux/sched/mm.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
-#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
@@ -18,9 +18,9 @@
#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
-#include <linux/cleancache.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
+#include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
@@ -1354,7 +1354,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
@@ -1376,61 +1376,9 @@ alloc:
f2fs_invalidate_compress_page(sbi, old_blkaddr);
}
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
- /*
- * i_size will be updated by direct_IO. Otherwise, we'll get stale
- * data from unwritten block via dio_read.
- */
return 0;
}
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct f2fs_map_blocks map;
- int flag;
- int err = 0;
- bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
- map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
- map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
- if (map.m_len > map.m_lblk)
- map.m_len -= map.m_lblk;
- else
- map.m_len = 0;
-
- map.m_next_pgofs = NULL;
- map.m_next_extent = NULL;
- map.m_seg_type = NO_CHECK_TYPE;
- map.m_may_create = true;
-
- if (direct_io) {
- map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, iocb, from) ?
- F2FS_GET_BLOCK_PRE_AIO :
- F2FS_GET_BLOCK_PRE_DIO;
- goto map_blocks;
- }
- if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
- if (f2fs_has_inline_data(inode))
- return err;
-
- flag = F2FS_GET_BLOCK_PRE_AIO;
-
-map_blocks:
- err = f2fs_map_blocks(inode, &map, 1, flag);
- if (map.m_len > 0 && err == -ENOSPC) {
- if (!direct_io)
- set_inode_flag(inode, FI_NO_PREALLOC);
- err = 0;
- }
- return err;
-}
-
void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -1590,8 +1538,11 @@ next_block:
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
@@ -1786,50 +1737,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
return (blks << inode->i_blkbits);
}
-static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type, bool may_write)
-{
- struct f2fs_map_blocks map;
- int err;
-
- map.m_lblk = iblock;
- map.m_len = bytes_to_blks(inode, bh->b_size);
- map.m_next_pgofs = next_pgofs;
- map.m_next_extent = NULL;
- map.m_seg_type = seg_type;
- map.m_may_create = may_write;
-
- err = f2fs_map_blocks(inode, &map, create, flag);
- if (!err) {
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = blks_to_bytes(inode, map.m_len);
-
- if (map.m_multidev_dio)
- bh->b_bdev = map.m_bdev;
- }
- return err;
-}
-
-static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- true);
-}
-
-static int get_data_block_dio(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- false);
-}
-
static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo)
{
@@ -1849,7 +1756,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
@@ -1881,7 +1788,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
@@ -2127,12 +2034,6 @@ got_it:
block_nr = map->m_pblk + block_in_file - map->m_lblk;
SetPageMappedToDisk(page);
- if (!PageUptodate(page) && (!PageSwapCache(page) &&
- !cleancache_get_page(page))) {
- SetPageUptodate(page);
- goto confused;
- }
-
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
@@ -2188,12 +2089,6 @@ submit_and_realloc:
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
out:
*bio_ret = bio;
return ret;
@@ -2542,7 +2437,7 @@ retry_encrypt:
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
@@ -2617,6 +2512,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ /* The below cases were checked when setting it. */
+ if (f2fs_is_pinned_file(inode))
+ return false;
+ if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return true;
if (f2fs_lfs_mode(sbi))
return true;
if (S_ISDIR(inode->i_mode))
@@ -2625,8 +2525,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (f2fs_is_atomic_file(inode))
return true;
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2738,7 +2636,7 @@ got_it:
fio->need_lock = LOCK_REQ;
}
- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
if (err)
goto out_writepage;
@@ -2987,6 +2885,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
.rpages = NULL,
.nr_rpages = 0,
.cpages = NULL,
+ .valid_nr_cpages = 0,
.rbuf = NULL,
.cbuf = NULL,
.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
@@ -3305,7 +3204,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO);
}
-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
{
loff_t i_size = i_size_read(inode);
@@ -3339,12 +3238,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
int flag;
/*
- * we already allocated all the blocks, so we don't need to get
- * the block addresses when there is no need to fill the page.
+ * If a whole page is being written and we already preallocated all the
+ * blocks, then there is no need to get a block address now.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
- !f2fs_verity_in_progress(inode))
+ if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
return 0;
/* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -3595,158 +3492,6 @@ unlock_out:
return copied;
}
-static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
- loff_t offset)
-{
- unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
- unsigned blkbits = i_blkbits;
- unsigned blocksize_mask = (1 << blkbits) - 1;
- unsigned long align = offset | iov_iter_alignment(iter);
- struct block_device *bdev = inode->i_sb->s_bdev;
-
- if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
- return 1;
-
- if (align & blocksize_mask) {
- if (bdev)
- blkbits = blksize_bits(bdev_logical_block_size(bdev));
- blocksize_mask = (1 << blkbits) - 1;
- if (align & blocksize_mask)
- return -EINVAL;
- return 1;
- }
- return 0;
-}
-
-static void f2fs_dio_end_io(struct bio *bio)
-{
- struct f2fs_private_dio *dio = bio->bi_private;
-
- dec_page_count(F2FS_I_SB(dio->inode),
- dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- bio->bi_private = dio->orig_private;
- bio->bi_end_io = dio->orig_end_io;
-
- kfree(dio);
-
- bio_endio(bio);
-}
-
-static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
- loff_t file_offset)
-{
- struct f2fs_private_dio *dio;
- bool write = (bio_op(bio) == REQ_OP_WRITE);
-
- dio = f2fs_kzalloc(F2FS_I_SB(inode),
- sizeof(struct f2fs_private_dio), GFP_NOFS);
- if (!dio)
- goto out;
-
- dio->inode = inode;
- dio->orig_end_io = bio->bi_end_io;
- dio->orig_private = bio->bi_private;
- dio->write = write;
-
- bio->bi_end_io = f2fs_dio_end_io;
- bio->bi_private = dio;
-
- inc_page_count(F2FS_I_SB(inode),
- write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- submit_bio(bio);
- return;
-out:
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
-}
-
-static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- size_t count = iov_iter_count(iter);
- loff_t offset = iocb->ki_pos;
- int rw = iov_iter_rw(iter);
- int err;
- enum rw_hint hint = iocb->ki_hint;
- int whint_mode = F2FS_OPTION(sbi).whint_mode;
- bool do_opu;
-
- err = check_direct_IO(inode, iter, offset);
- if (err)
- return err < 0 ? err : 0;
-
- if (f2fs_force_buffered_io(inode, iocb, iter))
- return 0;
-
- do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-
- if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = WRITE_LIFE_NOT_SET;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
- up_read(&fi->i_gc_rwsem[rw]);
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- } else {
- down_read(&fi->i_gc_rwsem[rw]);
- if (do_opu)
- down_read(&fi->i_gc_rwsem[READ]);
- }
-
- err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, rw == WRITE ? get_data_block_dio_write :
- get_data_block_dio, NULL, f2fs_dio_submit_bio,
- rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
- DIO_SKIP_HOLES);
-
- if (do_opu)
- up_read(&fi->i_gc_rwsem[READ]);
-
- up_read(&fi->i_gc_rwsem[rw]);
-
- if (rw == WRITE) {
- if (whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = hint;
- if (err > 0) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- err);
- if (!do_opu)
- set_inode_flag(inode, FI_UPDATE_WRITE);
- } else if (err == -EIOCBQUEUED) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- count - iov_iter_count(iter));
- } else if (err < 0) {
- f2fs_write_failed(inode, offset + count);
- }
- } else {
- if (err > 0)
- f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
- else if (err == -EIOCBQUEUED)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
- count - iov_iter_count(iter));
- }
-
-out:
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-
- return err;
-}
-
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
@@ -3770,12 +3515,9 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
clear_page_private_gcing(page);
- if (test_opt(sbi, COMPRESS_CACHE)) {
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(page);
- }
+ if (test_opt(sbi, COMPRESS_CACHE) &&
+ inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ clear_page_private_data(page);
if (page_private_atomic(page))
return f2fs_drop_inmem_page(inode, page);
@@ -3795,12 +3537,9 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 0;
if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
- struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct inode *inode = page->mapping->host;
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode)))
clear_page_private_data(page);
}
@@ -4202,7 +3941,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
- .direct_IO = f2fs_direct_IO,
+ .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
@@ -4282,3 +4021,58 @@ void f2fs_destroy_bio_entry_cache(void)
{
kmem_cache_destroy(bio_entry_slab);
}
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct f2fs_map_blocks map = {};
+ pgoff_t next_pgofs = 0;
+ int err;
+
+ map.m_lblk = bytes_to_blks(inode, offset);
+ map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+ map.m_next_pgofs = &next_pgofs;
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ if (flags & IOMAP_WRITE)
+ map.m_may_create = true;
+
+ err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+ F2FS_GET_BLOCK_DIO);
+ if (err)
+ return err;
+
+ iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+ if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+ iomap->length = blks_to_bytes(inode, map.m_len);
+ if (map.m_flags & F2FS_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags |= IOMAP_F_MERGED;
+ } else {
+ iomap->type = IOMAP_UNWRITTEN;
+ }
+ if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+ return -EINVAL;
+
+ iomap->bdev = map.m_bdev;
+ iomap->addr = blks_to_bytes(inode, map.m_pblk);
+ } else {
+ iomap->length = blks_to_bytes(inode, next_pgofs) -
+ iomap->offset;
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ }
+
+ if (map.m_flags & F2FS_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
+ if ((inode->i_state & I_DIRTY_DATASYNC) ||
+ offset + length > i_size_read(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+
+ return 0;
+}
+
+const struct iomap_ops f2fs_iomap_ops = {
+ .iomap_begin = f2fs_iomap_begin,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d0d603187171..eb22fa91c2b2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -58,6 +58,7 @@ enum {
FAULT_WRITE_IO,
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
+ FAULT_LOCK_OP,
FAULT_MAX,
};
@@ -656,6 +657,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
@@ -683,6 +685,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0
enum {
@@ -717,7 +723,7 @@ enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
FI_HOT_DATA, /* indicate file is hot */
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
@@ -1020,6 +1026,7 @@ struct f2fs_sm_info {
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
unsigned int ovp_segments; /* # of overprovision segments */
/* a threshold to reclaim prefree segments */
@@ -1488,6 +1495,7 @@ struct compress_ctx {
unsigned int nr_rpages; /* total page number in rpages */
struct page **cpages; /* pages store compressed data in cluster */
unsigned int nr_cpages; /* total page number in cpages */
+ unsigned int valid_nr_cpages; /* valid page number in cpages */
void *rbuf; /* virtual mapped address on rpages */
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
@@ -1679,6 +1687,9 @@ struct f2fs_sb_info {
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
+ spinlock_t gc_urgent_high_lock;
+ bool gc_urgent_high_limited; /* indicates having limited trial count */
+ unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */
/* for skip statistic */
unsigned int atomic_files; /* # of opened atomic file */
@@ -1803,13 +1814,6 @@ struct f2fs_sb_info {
#endif
};
-struct f2fs_private_dio {
- struct inode *inode;
- void *orig_private;
- bio_end_io_t *orig_end_io;
- bool write;
-};
-
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
@@ -2095,6 +2099,10 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
{
+ if (time_to_inject(sbi, FAULT_LOCK_OP)) {
+ f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+ return 0;
+ }
return down_read_trylock(&sbi->cp_rwsem);
}
@@ -2200,6 +2208,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ avail_user_block_count -= sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (avail_user_block_count > sbi->unusable_block_count)
avail_user_block_count -= sbi->unusable_block_count;
@@ -2446,6 +2459,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ valid_block_count += sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
user_block_count -= sbi->unusable_block_count;
@@ -3118,12 +3136,16 @@ static inline int is_file(struct inode *inode, int type)
static inline void set_file(struct inode *inode, int type)
{
+ if (is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise |= type;
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void clear_file(struct inode *inode, int type)
{
+ if (!is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise &= ~type;
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -3408,7 +3430,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni);
+ struct node_info *ni, bool checkpoint_context);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
@@ -3616,7 +3638,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
@@ -3639,6 +3660,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
struct writeback_control *wbc,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -3652,6 +3674,7 @@ int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+extern const struct iomap_ops f2fs_iomap_ops;
/*
* gc.c
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 92ec2699bc85..3c98ef6af97d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -24,6 +24,7 @@
#include <linux/sched/signal.h>
#include <linux/fileattr.h>
#include <linux/fadvise.h>
+#include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
@@ -1232,7 +1233,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
- ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (ret) {
f2fs_put_dnode(&dn);
return ret;
@@ -1687,6 +1688,7 @@ next_alloc:
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);
up_write(&sbi->pin_sem);
@@ -1748,7 +1750,11 @@ static long f2fs_fallocate(struct file *file, int mode,
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
- if (f2fs_compressed_file(inode) &&
+ /*
+ * Pinned file should not support partial trucation since the block
+ * can be used by applications.
+ */
+ if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
@@ -3143,17 +3149,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
inode_lock(inode);
- if (f2fs_should_update_outplace(inode, NULL)) {
- ret = -EINVAL;
- goto out;
- }
-
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
f2fs_i_gc_failures_write(inode, 0);
goto done;
}
+ if (f2fs_should_update_outplace(inode, NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (f2fs_pin_file_control(inode, false)) {
ret = -EAGAIN;
goto out;
@@ -4218,27 +4224,385 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return __f2fs_ioctl(filp, cmd, arg);
}
-static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Return %true if the given read or write request should use direct I/O, or
+ * %false if it should use buffered I/O.
+ */
+static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ unsigned int align;
+
+ if (!(iocb->ki_flags & IOCB_DIRECT))
+ return false;
+
+ if (f2fs_force_buffered_io(inode, iocb, iter))
+ return false;
+
+ /*
+ * Direct I/O not aligned to the disk's logical_block_size will be
+ * attempted, but will fail with -EINVAL.
+ *
+ * f2fs additionally requires that direct I/O be aligned to the
+ * filesystem block size, which is often a stricter requirement.
+ * However, f2fs traditionally falls back to buffered I/O on requests
+ * that are logical_block_size-aligned but not fs-block aligned.
+ *
+ * The below logic implements this behavior.
+ */
+ align = iocb->ki_pos | iov_iter_alignment(iter);
+ if (!IS_ALIGNED(align, i_blocksize(inode)) &&
+ IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
+ return false;
+
+ return true;
+}
+
+static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_READ);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
+ .end_io = f2fs_dio_read_end_io,
+};
+
+static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- int ret;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(to);
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ if (count == 0)
+ return 0; /* skip atime update */
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_READ counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_READ);
+ dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_read_ops, 0, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_READ);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ file_accessed(file);
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- ret = generic_file_read_iter(iocb, iter);
+ if (f2fs_should_use_dio(inode, iocb, to))
+ return f2fs_dio_read_iter(iocb, to);
+ ret = filemap_read(iocb, to, 0);
if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ return ret;
+}
+
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t count;
+ int err;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ return -EPERM;
+
+ count = generic_write_checks(iocb, from);
+ if (count <= 0)
+ return count;
+
+ err = file_modified(file);
+ if (err)
+ return err;
+ return count;
+}
+
+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so. Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+ bool dio)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(iter);
+ struct f2fs_map_blocks map = {};
+ int flag;
+ int ret;
+
+ /* If it will be an out-of-place direct write, don't bother. */
+ if (dio && f2fs_lfs_mode(sbi))
+ return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;
+
+ /* No-wait I/O can't allocate blocks. */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return 0;
+
+ /* If it will be a short write, don't bother. */
+ if (fault_in_iov_iter_readable(iter, count))
+ return 0;
+
+ if (f2fs_has_inline_data(inode)) {
+ /* If the data will fit inline, don't bother. */
+ if (pos + count <= MAX_INLINE_DATA(inode))
+ return 0;
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ /* Do not preallocate blocks that will be written partially in 4KB. */
+ map.m_lblk = F2FS_BLK_ALIGN(pos);
+ map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+ if (map.m_len > map.m_lblk)
+ map.m_len -= map.m_lblk;
+ else
+ map.m_len = 0;
+ map.m_may_create = true;
+ if (dio) {
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ flag = F2FS_GET_BLOCK_PRE_DIO;
+ } else {
+ map.m_seg_type = NO_CHECK_TYPE;
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+ }
+
+ ret = f2fs_map_blocks(inode, &map, 1, flag);
+ /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+ if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
+ return ret;
+ if (ret == 0)
+ set_inode_flag(inode, FI_PREALLOCATED_ALL);
+ return map.m_len;
+}
+
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ }
return ret;
}
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
+ .end_io = f2fs_dio_write_end_io,
+};
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ bool *may_need_sync)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const bool do_opu = f2fs_lfs_mode(sbi);
+ const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
+ const enum rw_hint hint = iocb->ki_hint;
+ unsigned int dio_flags;
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* f2fs_convert_inline_inode() and block allocation can block */
+ if (f2fs_has_inline_data(inode) ||
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[WRITE]);
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out;
+
+ down_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_WRITE);
+ dio_flags = 0;
+ if (pos + count > inode->i_size)
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_write_ops, dio_flags, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret == -ENOTBLK)
+ ret = 0;
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+ up_read(&fi->i_gc_rwsem[WRITE]);
+
+ if (ret < 0)
+ goto out;
+ if (pos + ret > inode->i_size)
+ f2fs_i_size_write(inode, pos + ret);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+
+ if (iov_iter_count(from)) {
+ ssize_t ret2;
+ loff_t bufio_start_pos = iocb->ki_pos;
+
+ /*
+ * The direct write was partial, so we need to fall back to a
+ * buffered write for the remainder.
+ */
+
+ ret2 = f2fs_buffered_write_iter(iocb, from);
+ if (iov_iter_count(from))
+ f2fs_write_failed(inode, iocb->ki_pos);
+ if (ret2 < 0)
+ goto out;
+
+ /*
+ * Ensure that the pagecache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ if (ret2 > 0) {
+ loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+ ret += ret2;
+
+ ret2 = filemap_write_and_wait_range(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
+ if (ret2 < 0)
+ goto out;
+ invalidate_mapping_pages(file->f_mapping,
+ bufio_start_pos >> PAGE_SHIFT,
+ bufio_end_pos >> PAGE_SHIFT);
+ }
+ } else {
+ /* iomap_dio_rw() already handled the generic_write_sync(). */
+ *may_need_sync = false;
+ }
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ const loff_t orig_pos = iocb->ki_pos;
+ const size_t orig_count = iov_iter_count(from);
+ loff_t target_size;
+ bool dio;
+ bool may_need_sync = true;
+ int preallocated;
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4260,91 +4624,42 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
- if (unlikely(IS_IMMUTABLE(inode))) {
- ret = -EPERM;
- goto unlock;
- }
-
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
- ret = -EPERM;
- goto unlock;
- }
-
- ret = generic_write_checks(iocb, from);
- if (ret > 0) {
- bool preallocated = false;
- size_t target_size = 0;
- int err;
-
- if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
- set_inode_flag(inode, FI_NO_PREALLOC);
-
- if ((iocb->ki_flags & IOCB_NOWAIT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, iocb, from)) {
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = -EAGAIN;
- goto out;
- }
- goto write;
- }
-
- if (is_inode_flag_set(inode, FI_NO_PREALLOC))
- goto write;
-
- if (iocb->ki_flags & IOCB_DIRECT) {
- /*
- * Convert inline data for Direct I/O before entering
- * f2fs_direct_IO().
- */
- err = f2fs_convert_inline_inode(inode);
- if (err)
- goto out_err;
- /*
- * If force_buffere_io() is true, we have to allocate
- * blocks all the time, since f2fs_direct_IO will fall
- * back to buffered IO.
- */
- if (!f2fs_force_buffered_io(inode, iocb, from) &&
- f2fs_lfs_mode(F2FS_I_SB(inode)))
- goto write;
- }
- preallocated = true;
- target_size = iocb->ki_pos + iov_iter_count(from);
+ ret = f2fs_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
- err = f2fs_preallocate_blocks(iocb, from);
- if (err) {
-out_err:
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = err;
- goto out;
- }
-write:
- ret = __generic_file_write_iter(iocb, from);
- clear_inode_flag(inode, FI_NO_PREALLOC);
+ /* Determine whether we will do a direct write or a buffered write. */
+ dio = f2fs_should_use_dio(inode, iocb, from);
- /* if we couldn't write data, we should deallocate blocks. */
- if (preallocated && i_size_read(inode) < target_size) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
- filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- }
+ /* Possibly preallocate the blocks for the write. */
+ target_size = iocb->ki_pos + iov_iter_count(from);
+ preallocated = f2fs_preallocate_blocks(iocb, from, dio);
+ if (preallocated < 0)
+ ret = preallocated;
+ else
+ /* Do the actual write. */
+ ret = dio ?
+ f2fs_dio_write_iter(iocb, from, &may_need_sync):
+ f2fs_buffered_write_iter(iocb, from);
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+ /* Don't leave any preallocated blocks around past i_size. */
+ if (preallocated && i_size_read(inode) < target_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ filemap_invalidate_lock(inode->i_mapping);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}
-unlock:
+
+ clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+out_unlock:
inode_unlock(inode);
out:
- trace_f2fs_file_write_iter(inode, iocb->ki_pos,
- iov_iter_count(from), ret);
- if (ret > 0)
+ trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
+ if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
return ret;
}
@@ -4352,12 +4667,12 @@ out:
static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
int advice)
{
- struct inode *inode;
struct address_space *mapping;
struct backing_dev_info *bdi;
+ struct inode *inode = file_inode(filp);
+ int err;
if (advice == POSIX_FADV_SEQUENTIAL) {
- inode = file_inode(filp);
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
@@ -4374,7 +4689,13 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
return 0;
}
- return generic_fadvise(filp, offset, len, advice);
+ err = generic_fadvise(filp, offset, len, advice);
+ if (!err && advice == POSIX_FADV_DONTNEED &&
+ test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
+ f2fs_compressed_file(inode))
+ f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
+
+ return err;
}
#ifdef CONFIG_COMPAT
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a946ce0ead34..ee308a8de432 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -7,7 +7,6 @@
*/
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/f2fs_fs.h>
#include <linux/kthread.h>
@@ -15,6 +14,7 @@
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
@@ -92,6 +92,18 @@ static int gc_thread_func(void *data)
* So, I'd like to wait some time to collect dirty segments.
*/
if (sbi->gc_mode == GC_URGENT_HIGH) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ if (sbi->gc_urgent_high_limited) {
+ if (!sbi->gc_urgent_high_remaining) {
+ sbi->gc_urgent_high_limited = false;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+ sbi->gc_mode = GC_NORMAL;
+ continue;
+ }
+ sbi->gc_urgent_high_remaining--;
+ }
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
wait_ms = gc_th->urgent_sleep_time;
down_write(&sbi->gc_lock);
goto do_gc;
@@ -947,7 +959,7 @@ next_step:
continue;
}
- if (f2fs_get_node_info(sbi, nid, &ni)) {
+ if (f2fs_get_node_info(sbi, nid, &ni, false)) {
f2fs_put_page(node_page, 1);
continue;
}
@@ -1015,7 +1027,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
- if (f2fs_get_node_info(sbi, nid, dni)) {
+ if (f2fs_get_node_info(sbi, nid, dni, false)) {
f2fs_put_page(node_page, 1);
return false;
}
@@ -1026,6 +1038,9 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
+ if (f2fs_check_nid_range(sbi, dni->ino))
+ return false;
+
*nofs = ofs_of_node(node_page);
source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
@@ -1039,7 +1054,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
blkaddr, source_blkaddr, segno);
- f2fs_bug_on(sbi, 1);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
}
}
#endif
@@ -1206,7 +1221,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
if (err)
goto put_out;
@@ -1375,8 +1390,7 @@ retry:
if (err) {
clear_page_private_gcing(page);
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
if (is_dirty)
@@ -1457,7 +1471,8 @@ next_step:
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode) || is_bad_inode(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode) ||
+ special_file(inode->i_mode))
continue;
if (!down_write_trylock(
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ea08f0dfa1bd..4b5cefa3f90c 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -131,7 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
- err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false);
if (err) {
f2fs_truncate_data_blocks_range(dn, 1);
f2fs_put_dnode(dn);
@@ -786,7 +786,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false);
if (err)
goto out;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f8b2df3e1e0..0ec8e32a00b4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -8,8 +8,8 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
@@ -516,6 +516,11 @@ make_now:
} else if (ino == F2FS_COMPRESS_INO(sbi)) {
#ifdef CONFIG_F2FS_FS_COMPRESSION
inode->i_mapping->a_ops = &f2fs_compress_aops;
+ /*
+ * generic_error_remove_page only truncates pages of regular
+ * inode
+ */
+ inode->i_mode |= S_IFREG;
#endif
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
@@ -544,6 +549,14 @@ make_now:
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -562,7 +575,7 @@ retry:
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
}
@@ -738,7 +751,8 @@ void f2fs_evict_inode(struct inode *inode)
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
- if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
+ if ((inode->i_nlink || is_bad_inode(inode)) &&
+ test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
f2fs_invalidate_compress_pages(sbi, inode->i_ino);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
@@ -868,7 +882,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index cdcf54ae0db8..be599f31d3c4 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
- spin_lock_irq(&sbi->iostat_lat_lock);
+ spin_lock_bh(&sbi->iostat_lat_lock);
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
for (io = 0; io < NR_PAGE_TYPE; io++) {
cnt = io_lat->bio_cnt[idx][io];
@@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
io_lat->bio_cnt[idx][io] = 0;
}
}
- spin_unlock_irq(&sbi->iostat_lat_lock);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
trace_f2fs_iostat_latency(sbi, iostat_lat);
}
@@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
return;
/* Need double check under the lock */
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
if (time_is_after_jiffies(sbi->iostat_next_period)) {
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
return;
}
sbi->iostat_next_period = jiffies +
@@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
}
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
@@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
for (i = 0; i < NR_IO_TYPE; i++) {
sbi->rw_iostat[i] = 0;
sbi->prev_rw_iostat[i] = 0;
}
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
- spin_lock_irq(&sbi->iostat_lat_lock);
+ spin_lock_bh(&sbi->iostat_lat_lock);
memset(io_lat, 0, sizeof(struct iostat_lat_info));
- spin_unlock_irq(&sbi->iostat_lat_lock);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
if (!sbi->iostat_enable)
return;
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes;
- if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->rw_iostat[APP_BUFFERED_IO] =
- sbi->rw_iostat[APP_WRITE_IO] -
- sbi->rw_iostat[APP_DIRECT_IO];
+ if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+ sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
- if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
- sbi->rw_iostat[APP_BUFFERED_READ_IO] =
- sbi->rw_iostat[APP_READ_IO] -
- sbi->rw_iostat[APP_DIRECT_READ_IO];
- spin_unlock(&sbi->iostat_lock);
+ if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
+ sbi->rw_iostat[APP_READ_IO] += io_bytes;
+
+ spin_unlock_bh(&sbi->iostat_lock);
f2fs_record_iostat(sbi);
}
@@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
{
unsigned long ts_diff;
unsigned int iotype = iostat_ctx->type;
- unsigned long flags;
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int idx;
@@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
idx = WRITE_ASYNC_IO;
}
- spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+ spin_lock_bh(&sbi->iostat_lat_lock);
io_lat->sum_lat[idx][iotype] += ts_diff;
io_lat->bio_cnt[idx][iotype]++;
if (ts_diff > io_lat->peak_lat[idx][iotype])
io_lat->peak_lat[idx][iotype] = ts_diff;
- spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
}
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 556fcd8457f3..50b2874e758c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -8,7 +8,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/mpage.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/swap.h>
@@ -430,6 +430,10 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *new, *e;
+ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
+ if (rwsem_is_locked(&sbi->cp_global_sem))
+ return;
+
new = __alloc_nat_entry(sbi, nid, false);
if (!new)
return;
@@ -539,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
}
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni)
+ struct node_info *ni, bool checkpoint_context)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -572,9 +576,10 @@ retry:
* nat_tree_lock. Therefore, we should retry, if we failed to grab here
* while not bothering checkpoint.
*/
- if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
down_read(&curseg->journal_rwsem);
- } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+ !down_read_trylock(&curseg->journal_rwsem)) {
up_read(&nm_i->nat_tree_lock);
goto retry;
}
@@ -887,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn)
int err;
pgoff_t index;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
@@ -1286,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
if (err) {
dec_valid_node_count(sbi, dn->inode, !ofs);
goto fail;
@@ -1348,7 +1353,7 @@ static int read_node_page(struct page *page, int op_flags)
return LOCKED_PAGE;
}
- err = f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni, false);
if (err)
return err;
@@ -1600,7 +1605,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
- if (f2fs_get_node_info(sbi, nid, &ni))
+ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
goto redirty_out;
if (wbc->for_reclaim) {
@@ -2701,7 +2706,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
if (err)
return err;
@@ -2741,7 +2746,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
struct page *ipage;
int err;
- err = f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni, false);
if (err)
return err;
@@ -2750,7 +2755,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
retry:
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
if (!ipage) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 6a1b4668d933..9683c80ff8c2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -8,6 +8,7 @@
#include <asm/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
@@ -587,7 +588,7 @@ retry_dn:
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_dn;
}
goto out;
@@ -595,7 +596,7 @@ retry_dn:
f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err)
goto err;
@@ -670,8 +671,7 @@ retry_prev:
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_prev;
}
goto err;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index df9ed75f0b7a..1dabc8244083 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -9,6 +9,7 @@
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/sched/mm.h>
#include <linux/prefetch.h>
#include <linux/kthread.h>
#include <linux/swap.h>
@@ -245,16 +246,14 @@ retry:
LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
err = -EAGAIN;
goto next;
}
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -424,9 +423,7 @@ retry:
err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
unlock_page(page);
@@ -2558,8 +2555,8 @@ find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
- secno = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ secno = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
} else {
go_left = 1;
@@ -2574,8 +2571,8 @@ find_other_zone:
left_start--;
continue;
}
- left_start = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ left_start = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
break;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 46fde9f3f28e..0291cd55cf09 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -538,7 +538,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
- return SM_I(sbi)->reserved_segments;
+ return SM_I(sbi)->reserved_segments +
+ SM_I(sbi)->additional_reserved_segments;
}
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 040b6d02e1d8..76e6a3df9aba 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -8,9 +8,9 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
@@ -59,6 +59,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_WRITE_IO] = "write IO error",
[FAULT_SLAB_ALLOC] = "slab alloc",
[FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -260,29 +261,22 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
static const struct f2fs_sb_encodings {
__u16 magic;
char *name;
- char *version;
+ unsigned int version;
} f2fs_sb_encoding_map[] = {
- {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+ {F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
};
-static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
- const struct f2fs_sb_encodings **encoding,
- __u16 *flags)
+static const struct f2fs_sb_encodings *
+f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
{
__u16 magic = le16_to_cpu(sb->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
if (magic == f2fs_sb_encoding_map[i].magic)
- break;
-
- if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
- return -EINVAL;
-
- *encoding = &f2fs_sb_encoding_map[i];
- *flags = le16_to_cpu(sb->s_encoding_flags);
+ return &f2fs_sb_encoding_map[i];
- return 0;
+ return NULL;
}
struct kmem_cache *f2fs_cf_name_slab;
@@ -328,6 +322,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ unsigned int avg_vblocks;
+ unsigned int wanted_reserved_segments;
+ block_t avail_user_block_count;
+
+ if (!F2FS_IO_ALIGNED(sbi))
+ return 0;
+
+ /* average valid block count in section in worst case */
+ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+ /*
+ * we need enough free space when migrating one section in worst case
+ */
+ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+ reserved_segments(sbi);
+ wanted_reserved_segments -= reserved_segments(sbi);
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks -
+ F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (wanted_reserved_segments * sbi->blocks_per_seg >
+ avail_user_block_count) {
+ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+ wanted_reserved_segments,
+ avail_user_block_count >> sbi->log_blocks_per_seg);
+ return -ENOSPC;
+ }
+
+ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+ wanted_reserved_segments);
+
+ return 0;
+}
+
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -2415,8 +2449,7 @@ repeat:
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto repeat;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@ -3548,6 +3581,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
+ spin_lock_init(&sbi->gc_urgent_high_lock);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3875,25 +3909,32 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
struct unicode_map *encoding;
__u16 encoding_flags;
- if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
- &encoding_flags)) {
+ encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
+ if (!encoding_info) {
f2fs_err(sbi,
"Encoding requested by superblock is unknown");
return -EINVAL;
}
+ encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
f2fs_err(sbi,
- "can't mount with superblock charset: %s-%s "
+ "can't mount with superblock charset: %s-%u.%u.%u "
"not supported by the kernel. flags: 0x%x.",
- encoding_info->name, encoding_info->version,
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
encoding_flags);
return PTR_ERR(encoding);
}
f2fs_info(sbi, "Using encoding defined by superblock: "
- "%s-%s with flags 0x%hx", encoding_info->name,
- encoding_info->version?:"\b", encoding_flags);
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
sbi->sb->s_encoding = encoding;
sbi->sb->s_encoding_flags = encoding_flags;
@@ -4180,6 +4221,10 @@ try_onemore:
goto free_nm;
}
+ err = adjust_reserved_segment(sbi);
+ if (err)
+ goto free_nm;
+
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 7d289249cd7e..df406c16b2eb 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -118,6 +118,15 @@ static ssize_t sb_status_show(struct f2fs_attr *a,
return sprintf(buf, "%lx\n", sbi->s_flag);
}
+static ssize_t pending_discard_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+ &SM_I(sbi)->dcc_info->discard_cmd_cnt));
+}
+
static ssize_t features_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -196,8 +205,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
struct super_block *sb = sbi->sb;
if (f2fs_sb_has_casefold(sbi))
- return sysfs_emit(buf, "%s (%d.%d.%d)\n",
- sb->s_encoding->charset,
+ return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
(sb->s_encoding->version >> 16) & 0xff,
(sb->s_encoding->version >> 8) & 0xff,
sb->s_encoding->version & 0xff);
@@ -415,7 +423,9 @@ out:
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- F2FS_OPTION(sbi).root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks -
+ sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -478,6 +488,15 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ sbi->gc_urgent_high_limited = t != 0;
+ sbi->gc_urgent_high_remaining = t;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -733,6 +752,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
@@ -744,6 +764,7 @@ F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
+F2FS_GENERAL_RO_ATTR(pending_discard);
#ifdef CONFIG_F2FS_STAT_FS
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@ -812,6 +833,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
+ ATTR_LIST(pending_discard),
ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
@@ -844,6 +866,7 @@ static struct attribute *f2fs_attrs[] = {
#endif
ATTR_LIST(data_io_flag),
ATTR_LIST(node_io_flag),
+ ATTR_LIST(gc_urgent_high_remaining),
ATTR_LIST(ckpt_thread_ioprio),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index e348f33bcb2b..8e5cd9c916ff 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -226,15 +226,18 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
}
static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
- void *last_base_addr, int index,
- size_t len, const char *name)
+ void *last_base_addr, void **last_addr,
+ int index, size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
- (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
+ if (last_addr)
+ *last_addr = entry;
return NULL;
+ }
if (entry->e_name_index != index)
continue;
@@ -254,19 +257,9 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
unsigned int inline_size = inline_xattr_size(inode);
void *max_addr = base_addr + inline_size;
- list_for_each_xattr(entry, base_addr) {
- if ((void *)entry + sizeof(__u32) > max_addr ||
- (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
- *last_addr = entry;
- return NULL;
- }
- if (entry->e_name_index != index)
- continue;
- if (entry->e_name_len != len)
- continue;
- if (!memcmp(entry->e_name, name, len))
- break;
- }
+ entry = __find_xattr(base_addr, max_addr, last_addr, index, len, name);
+ if (!entry)
+ return NULL;
/* inline xattr header or entry across max inline xattr size */
if (IS_XATTR_LAST_ENTRY(entry) &&
@@ -368,7 +361,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
if (!*xe) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
@@ -659,7 +652,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
/* find entry with wanted name. */
- here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
if (!here) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
@@ -684,8 +677,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
}
last = here;
- while (!IS_XATTR_LAST_ENTRY(last))
+ while (!IS_XATTR_LAST_ENTRY(last)) {
+ if ((void *)(last) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
+ inode->i_ino, ENTRY_SIZE(last));
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
last = XATTR_NEXT_ENTRY(last);
+ }
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 13855ba49cd9..a5a309fcc7fa 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -175,9 +175,10 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static int fat_file_release(struct inode *inode, struct file *filp)
{
if ((filp->f_mode & FMODE_WRITE) &&
- MSDOS_SB(inode->i_sb)->options.flush) {
+ MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL);
- congestion_wait(BLK_RW_ASYNC, HZ/10);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ io_schedule_timeout(HZ/10);
}
return 0;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 45437f8e1003..57edef16dce4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -33,7 +33,7 @@
#include "internal.h"
/* sysctl tunables... */
-struct files_stat_struct files_stat = {
+static struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
@@ -75,22 +75,53 @@ unsigned long get_max_files(void)
}
EXPORT_SYMBOL_GPL(get_max_files);
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+
/*
* Handle nr_files sysctl
*/
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
-#else
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+
+static struct ctl_table fs_stat_sysctls[] = {
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = SYSCTL_LONG_ZERO,
+ .extra2 = SYSCTL_LONG_MAX,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ { }
+};
+
+static int __init init_fs_stat_sysctls(void)
{
- return -ENOSYS;
+ register_sysctl_init("fs", fs_stat_sysctls);
+ return 0;
}
+fs_initcall(init_fs_stat_sysctls);
#endif
static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index b7e43a780a62..24ce12f0db32 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -548,7 +548,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
param->key);
}
- if (len > PAGE_SIZE - 2 - size)
+ if (size + len + 2 > PAGE_SIZE)
return invalf(fc, "VFS: Legacy: Cumulative options too large");
if (strchr(param->key, ',') ||
(param->type == fs_value_is_string &&
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a57c6cbee858..f2aa7dbad766 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
wait_var_event_timeout(&candidate->flags,
- fscache_is_acquire_pending(candidate), 20 * HZ);
+ !fscache_is_acquire_pending(candidate), 20 * HZ);
if (!fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate));
+ wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
}
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 74e627109b79..9d737904d07c 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -891,7 +891,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
return 0;
out_vqs:
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
kfree(fs->vqs);
@@ -923,7 +923,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
list_del_init(&fs->list);
virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues_locked(fs);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
vdev->priv = NULL;
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 456e87aec7fd..68b4240c6191 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -260,8 +260,10 @@ struct hfsplus_cat_folder {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct DInfo user_info;
- struct DXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct DInfo user_info;
+ struct DXInfo finder_info;
+ );
__be32 text_encoding;
__be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */
} __packed;
@@ -294,8 +296,10 @@ struct hfsplus_cat_file {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct FInfo user_info;
- struct FXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct FInfo user_info;
+ struct FXInfo finder_info;
+ );
__be32 text_encoding;
u32 reserved2;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e2855ceefd39..49891b12c415 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -296,7 +296,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
sizeof(hfsplus_cat_entry));
if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
if (size == folder_finderinfo_len) {
- memcpy(&entry.folder.user_info, value,
+ memcpy(&entry.folder.info, value,
folder_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
@@ -309,7 +309,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
}
} else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
if (size == file_finderinfo_len) {
- memcpy(&entry.file.user_info, value,
+ memcpy(&entry.file.info, value,
file_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 49d2e686be74..a7c6c7498be0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -409,10 +409,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
struct vm_area_struct *vma;
/*
- * end == 0 indicates that the entire range after
- * start should be unmapped.
+ * end == 0 indicates that the entire range after start should be
+ * unmapped. Note, end is exclusive, whereas the interval tree takes
+ * an inclusive "last".
*/
- vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+ vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
unsigned long v_offset;
unsigned long v_end;
diff --git a/fs/inode.c b/fs/inode.c
index 6b80a51129d5..63324df6fa27 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -67,11 +67,6 @@ const struct address_space_operations empty_aops = {
};
EXPORT_SYMBOL(empty_aops);
-/*
- * Statistics gathering..
- */
-struct inodes_stat_t inodes_stat;
-
static DEFINE_PER_CPU(unsigned long, nr_inodes);
static DEFINE_PER_CPU(unsigned long, nr_unused);
@@ -106,13 +101,43 @@ long get_nr_dirty_inodes(void)
* Handle nr_inode sysctl
*/
#ifdef CONFIG_SYSCTL
-int proc_nr_inodes(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+/*
+ * Statistics gathering..
+ */
+static struct inodes_stat_t inodes_stat;
+
+static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table inodes_sysctls[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ { }
+};
+
+static int __init init_fs_inode_sysctls(void)
+{
+ register_sysctl_init("fs", inodes_sysctls);
+ return 0;
+}
+early_initcall(init_fs_inode_sysctls);
#endif
static int no_open(struct inode *inode, struct file *file)
@@ -526,6 +551,55 @@ void __remove_inode_hash(struct inode *inode)
}
EXPORT_SYMBOL(__remove_inode_hash);
+void dump_mapping(const struct address_space *mapping)
+{
+ struct inode *host;
+ const struct address_space_operations *a_ops;
+ struct hlist_node *dentry_first;
+ struct dentry *dentry_ptr;
+ struct dentry dentry;
+ unsigned long ino;
+
+ /*
+ * If mapping is an invalid pointer, we don't want to crash
+ * accessing it, so probe everything depending on it carefully.
+ */
+ if (get_kernel_nofault(host, &mapping->host) ||
+ get_kernel_nofault(a_ops, &mapping->a_ops)) {
+ pr_warn("invalid mapping:%px\n", mapping);
+ return;
+ }
+
+ if (!host) {
+ pr_warn("aops:%ps\n", a_ops);
+ return;
+ }
+
+ if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
+ get_kernel_nofault(ino, &host->i_ino)) {
+ pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
+ return;
+ }
+
+ if (!dentry_first) {
+ pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
+ return;
+ }
+
+ dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
+ if (get_kernel_nofault(dentry, dentry_ptr)) {
+ pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
+ a_ops, ino, dentry_ptr);
+ return;
+ }
+
+ /*
+ * if dentry is corrupted, the %pd handler may still crash,
+ * but it's unlikely that we reach here with a corrupt mapping
+ */
+ pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+}
+
void clear_inode(struct inode *inode)
{
/*
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5c4f582d6549..bb7f161bb19c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,7 +48,8 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
- spinlock_t lock;
+ struct io_wq_work *next_work;
+ raw_spinlock_t lock;
struct completion ref_done;
@@ -405,8 +406,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
*/
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
- struct io_wq_work *work)
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{
if (worker->flags & IO_WORKER_F_FREE) {
@@ -529,9 +529,10 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock(&worker->lock);
+ worker->next_work = NULL;
+ raw_spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -546,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do {
struct io_wq_work *work;
-get_next:
+
/*
* If we got some work, mark us as busy. If we didn't, but
* the list isn't empty, it means we stalled on hashed work.
@@ -555,9 +556,20 @@ get_next:
* clear the stalled flag.
*/
work = io_get_next_work(acct, worker);
- if (work)
- __io_worker_busy(wqe, worker, work);
-
+ if (work) {
+ __io_worker_busy(wqe, worker);
+
+ /*
+ * Make sure cancelation can find this, even before
+ * it becomes the active work. That avoids a window
+ * where the work has been removed from our general
+ * work list, but isn't yet discoverable as the
+ * current work item for this worker.
+ */
+ raw_spin_lock(&worker->lock);
+ worker->next_work = work;
+ raw_spin_unlock(&worker->lock);
+ }
raw_spin_unlock(&wqe->lock);
if (!work)
break;
@@ -594,11 +606,6 @@ get_next:
spin_unlock_irq(&wq->hash->wait.lock);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
- raw_spin_lock(&wqe->lock);
- /* skip unnecessary unlock-lock wqe->lock */
- if (!work)
- goto get_next;
- raw_spin_unlock(&wqe->lock);
}
} while (work);
@@ -670,7 +677,7 @@ loop:
*/
void io_wq_worker_running(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;
if (!worker)
return;
@@ -688,7 +695,7 @@ void io_wq_worker_running(struct task_struct *tsk)
*/
void io_wq_worker_sleeping(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;
if (!worker)
return;
@@ -707,7 +714,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
struct task_struct *tsk)
{
- tsk->pf_io_worker = worker;
+ tsk->worker_private = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
@@ -815,7 +822,7 @@ fail:
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
@@ -973,6 +980,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+ struct io_cb_cancel_data *match,
+ struct io_wq_work *work)
+{
+ if (work && match->fn(work, match->data)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ set_notify_signal(worker->task);
+ return true;
+ }
+
+ return false;
+}
+
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -981,13 +1001,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock(&worker->lock);
- if (worker->cur_work &&
- match->fn(worker->cur_work, match->data)) {
- set_notify_signal(worker->task);
+ raw_spin_lock(&worker->lock);
+ if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+ __io_wq_worker_cancel(worker, match, worker->next_work))
match->nr_running++;
- }
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -1039,17 +1057,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
int i;
retry:
- raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
+ raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
- return;
+ break;
}
}
- raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1074,25 +1091,27 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
* First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new,
* no completion will be posted for it.
- */
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- io_wqe_cancel_pending_work(wqe, &match);
- if (match.nr_pending && !match.cancel_all)
- return IO_WQ_CANCEL_OK;
- }
-
- /*
- * Now check if a free (going busy) or busy worker has the work
+ *
+ * Then check if a free (going busy) or busy worker has the work
* currently running. If we find it there, we'll return CANCEL_RUNNING
* as an indication that we attempt to signal cancellation. The
* completion will run normally in this case.
+ *
+ * Do both of these while holding the wqe->lock, to ensure that
+ * we'll find a work item regardless of state.
*/
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all) {
+ raw_spin_unlock(&wqe->lock);
+ return IO_WQ_CANCEL_OK;
+ }
+
io_wqe_cancel_running_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
return IO_WQ_CANCEL_RUNNING;
}
@@ -1263,7 +1282,9 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 3709b7c5ec98..dbecd27656c7 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -222,6 +222,6 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
static inline bool io_wq_current_is_worker(void)
{
return in_task() && (current->flags & PF_IO_WORKER) &&
- current->pf_io_worker;
+ current->worker_private;
}
#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index de9c9de90655..e54c4127422e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
return atomic_dec_and_test(&req->refs);
}
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
- WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
static inline void req_ref_get(struct io_kiocb *req)
{
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
{
- struct wait_queue_head *head = poll->head;
+ struct wait_queue_head *head = smp_load_acquire(&poll->head);
- spin_lock_irq(&head->lock);
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
- spin_unlock_irq(&head->lock);
+ if (head) {
+ spin_lock_irq(&head->lock);
+ list_del_init(&poll->wait.entry);
+ poll->head = NULL;
+ spin_unlock_irq(&head->lock);
+ }
}
static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
struct io_poll_iocb *poll = io_poll_get_single(req);
struct io_poll_iocb *poll_double = io_poll_get_double(req);
- if (poll->head)
- io_poll_remove_entry(poll);
- if (poll_double && poll_double->head)
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ io_poll_remove_entry(poll);
+ if (poll_double)
io_poll_remove_entry(poll_double);
+ rcu_read_unlock();
}
/*
@@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
wait);
__poll_t mask = key_to_poll(key);
+ if (unlikely(mask & POLLFREE)) {
+ io_poll_mark_cancelled(req);
+ /* we have to kick tw in case it's not already */
+ io_poll_execute(req, 0);
+
+ /*
+ * If the waitqueue is being freed early but someone is already
+ * holds ownership over it, we have to tear down the request as
+ * best we can. That means immediately removing the request from
+ * its waitqueue and preventing all further accesses to the
+ * waitqueue via the request.
+ */
+ list_del_init(&poll->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&poll->head, NULL);
+ return 1;
+ }
+
/* for instances that support it check for an event match first */
if (mask && !(mask & poll->events))
return 0;
@@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
- return ret;
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
spin_lock(&ctx->completion_lock);
+ ret = io_poll_cancel(ctx, sqe_addr, false);
+ if (ret != -ENOENT)
+ goto out;
+
spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
spin_unlock_irq(&ctx->timeout_lock);
- if (ret != -ENOENT)
- goto out;
- ret = io_poll_cancel(ctx, sqe_addr, false);
out:
spin_unlock(&ctx->completion_lock);
return ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 504e69578112..1ed097e94af2 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -430,7 +430,7 @@ static int ioctl_file_dedupe_range(struct file *file,
goto out;
}
- size = offsetof(struct file_dedupe_range __user, info[count]);
+ size = offsetof(struct file_dedupe_range, info[count]);
if (size > PAGE_SIZE) {
ret = -ENOMEM;
goto out;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0b86a4365b66..f13d548e4a7f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1212,7 +1212,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{
- journal_t *journal = PDE_DATA(inode);
+ journal_t *journal = pde_data(inode);
struct jbd2_stats_proc_session *s;
int rc, size;
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 2b4d5013dc5d..6da92ecaf66d 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -161,5 +161,5 @@ static int jffs2_garbage_collect_thread(void *_c)
spin_lock(&c->erase_completion_lock);
c->gc_task = NULL;
spin_unlock(&c->erase_completion_lock);
- complete_and_exit(&c->gc_thread_exit, 0);
+ kthread_complete_and_exit(&c->gc_thread_exit, 0);
}
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
index b014f4638610..c03eba090368 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/ksmbd/asn1.c
@@ -21,101 +21,11 @@
#include "ksmbd_spnego_negtokeninit.asn1.h"
#include "ksmbd_spnego_negtokentarg.asn1.h"
-#define SPNEGO_OID_LEN 7
#define NTLMSSP_OID_LEN 10
-#define KRB5_OID_LEN 7
-#define KRB5U2U_OID_LEN 8
-#define MSKRB5_OID_LEN 7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
0x82, 0x37, 0x02, 0x02, 0x0a };
-static bool
-asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
- unsigned long *subid)
-{
- const unsigned char *ptr = *begin;
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (ptr >= end)
- return false;
-
- ch = *ptr++;
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
-
- *begin = ptr;
- return true;
-}
-
-static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
- unsigned long **oid, size_t *oidlen)
-{
- const unsigned char *iptr = value, *end = value + vlen;
- unsigned long *optr;
- unsigned long subid;
-
- vlen += 1;
- if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
- goto fail_nullify;
-
- *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
- if (!*oid)
- return false;
-
- optr = *oid;
-
- if (!asn1_subid_decode(&iptr, end, &subid))
- goto fail;
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *oidlen = 2;
- optr += 2;
-
- while (iptr < end) {
- if (++(*oidlen) > vlen)
- goto fail;
-
- if (!asn1_subid_decode(&iptr, end, optr++))
- goto fail;
- }
- return true;
-
-fail:
- kfree(*oid);
-fail_nullify:
- *oid = NULL;
- return false;
-}
-
-static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
- unsigned long *oid2, unsigned int oid2len)
-{
- if (oid1len != oid2len)
- return false;
-
- return memcmp(oid1, oid2, oid1len) == 0;
-}
-
int
ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
struct ksmbd_conn *conn)
@@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
const void *value, size_t vlen)
{
- unsigned long *oid;
- size_t oidlen;
- int err = 0;
-
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
- err = -EBADMSG;
- goto out;
- }
+ enum OID oid;
- if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
- err = -EBADMSG;
- kfree(oid);
-out:
- if (err) {
+ oid = look_up_OID(value, vlen);
+ if (oid != OID_spnego) {
char buf[50];
sprint_oid(value, vlen, buf, sizeof(buf));
ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
}
- return err;
+
+ return 0;
}
int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
@@ -279,37 +181,31 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
size_t vlen)
{
struct ksmbd_conn *conn = context;
- unsigned long *oid;
- size_t oidlen;
+ enum OID oid;
int mech_type;
- char buf[50];
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
- goto fail;
-
- if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ oid = look_up_OID(value, vlen);
+ if (oid == OID_ntlmssp) {
mech_type = KSMBD_AUTH_NTLMSSP;
- else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ } else if (oid == OID_mskrb5) {
mech_type = KSMBD_AUTH_MSKRB5;
- else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ } else if (oid == OID_krb5) {
mech_type = KSMBD_AUTH_KRB5;
- else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ } else if (oid == OID_krb5u2u) {
mech_type = KSMBD_AUTH_KRB5U2U;
- else
- goto fail;
+ } else {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+ }
conn->auth_mechs |= mech_type;
if (conn->preferred_auth_mech == 0)
conn->preferred_auth_mech = mech_type;
- kfree(oid);
return 0;
-
-fail:
- kfree(oid);
- sprint_oid(value, vlen, buf, sizeof(buf));
- ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
- return -EBADMSG;
}
int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 3503b1c48cb4..dc3d061edda9 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -215,7 +215,7 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name)
+ int blen, char *domain_name, char *cryptkey)
{
char ntlmv2_hash[CIFS_ENCPWD_SIZE];
char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
@@ -256,7 +256,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
goto out;
}
- memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE);
memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
@@ -295,7 +295,8 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess)
{
char *domain_name;
unsigned int nt_off, dn_off;
@@ -324,7 +325,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
/* TODO : use domain name that imported from configuration file */
domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
- dn_len, true, sess->conn->local_nls);
+ dn_len, true, conn->local_nls);
if (IS_ERR(domain_name))
return PTR_ERR(domain_name);
@@ -333,7 +334,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
domain_name);
ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
nt_len - CIFS_ENCPWD_SIZE,
- domain_name);
+ domain_name, conn->ntlmssp.cryptkey);
kfree(domain_name);
return ret;
}
@@ -347,7 +348,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
*
*/
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn)
{
if (blob_len < sizeof(struct negotiate_message)) {
ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
@@ -361,7 +362,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
return -EINVAL;
}
- sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
return 0;
}
@@ -375,14 +376,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
*/
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess)
+ struct ksmbd_conn *conn)
{
struct target_info *tinfo;
wchar_t *name;
__u8 *target_name;
unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
int len, uni_len, conv_len;
- int cflags = sess->ntlmssp.client_flags;
+ int cflags = conn->ntlmssp.client_flags;
memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
chgblob->MessageType = NtLmChallenge;
@@ -403,7 +404,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
if (cflags & NTLMSSP_REQUEST_TARGET)
flags |= NTLMSSP_REQUEST_TARGET;
- if (sess->conn->use_spnego &&
+ if (conn->use_spnego &&
(cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
@@ -414,7 +415,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
return -ENOMEM;
conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
- sess->conn->local_nls);
+ conn->local_nls);
if (conv_len < 0 || conv_len > len) {
kfree(name);
return -EINVAL;
@@ -430,8 +431,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
/* Initialize random conn challenge */
- get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
- memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+ get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64));
+ memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey,
CIFS_CRYPTO_KEY_SIZE);
/* Add Target Information to security buffer */
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
index 9c2d4badd05d..95629651cf26 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/ksmbd/auth.h
@@ -38,16 +38,16 @@ struct kvec;
int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
unsigned int nvec, int enc);
void ksmbd_copy_gss_neg_header(void *buf);
-int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name);
+ int blen, char *domain_name, char *cryptkey);
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess);
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn);
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess);
+ struct ksmbd_conn *conn);
int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
int in_len, char *out_blob, int *out_len);
int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 83a94d0bb480..208d2cff7bd3 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
conn->total_credits = 1;
+ conn->outstanding_credits = 1;
init_waitqueue_head(&conn->req_running_q);
INIT_LIST_HEAD(&conn->conns_list);
@@ -386,17 +387,24 @@ out:
static void stop_sessions(void)
{
struct ksmbd_conn *conn;
+ struct ksmbd_transport *t;
again:
read_lock(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
struct task_struct *task;
- task = conn->transport->handler;
+ t = conn->transport;
+ task = t->handler;
if (task)
ksmbd_debug(CONN, "Stop session handler %s/%d\n",
task->comm, task_pid_nr(task));
conn->status = KSMBD_SESS_EXITING;
+ if (t->ops->shutdown) {
+ read_unlock(&conn_list_lock);
+ t->ops->shutdown(t);
+ read_lock(&conn_list_lock);
+ }
}
read_unlock(&conn_list_lock);
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index e5403c587a58..7a59aacb5daa 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -61,8 +61,8 @@ struct ksmbd_conn {
atomic_t req_running;
/* References which are made for this Server object*/
atomic_t r_count;
- unsigned short total_credits;
- unsigned short max_credits;
+ unsigned int total_credits;
+ unsigned int outstanding_credits;
spinlock_t credits_lock;
wait_queue_head_t req_running_q;
/* Lock to protect requests list*/
@@ -72,12 +72,7 @@ struct ksmbd_conn {
int connection_type;
struct ksmbd_stats stats;
char ClientGUID[SMB2_CLIENT_GUID_SIZE];
- union {
- /* pending trans request table */
- struct trans_state *recent_trans;
- /* Used by ntlmssp */
- char *ntlmssp_cryptkey;
- };
+ struct ntlmssp_auth ntlmssp;
spinlock_t llist_lock;
struct list_head lock_list;
@@ -122,6 +117,7 @@ struct ksmbd_conn_ops {
struct ksmbd_transport_ops {
int (*prepare)(struct ksmbd_transport *t);
void (*disconnect)(struct ksmbd_transport *t);
+ void (*shutdown)(struct ksmbd_transport *t);
int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
int size, bool need_invalidate_rkey,
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index c6718a05d347..71bfb7de4472 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -103,6 +103,8 @@ struct ksmbd_startup_request {
* we set the SPARSE_FILES bit (0x40).
*/
__u32 sub_auth[3]; /* Subauth value for Security ID */
+ __u32 smb2_max_credits; /* MAX credits */
+ __u32 reserved[128]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
};
@@ -113,7 +115,7 @@ struct ksmbd_startup_request {
* IPC request to shutdown ksmbd server.
*/
struct ksmbd_shutdown_request {
- __s32 reserved;
+ __s32 reserved[16];
};
/*
@@ -122,6 +124,7 @@ struct ksmbd_shutdown_request {
struct ksmbd_login_request {
__u32 handle;
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -135,6 +138,7 @@ struct ksmbd_login_response {
__u16 status;
__u16 hash_sz; /* hash size */
__s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -143,6 +147,7 @@ struct ksmbd_login_response {
struct ksmbd_share_config_request {
__u32 handle;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -157,6 +162,7 @@ struct ksmbd_share_config_response {
__u16 force_directory_mode;
__u16 force_uid;
__u16 force_gid;
+ __u32 reserved[128]; /* Reserved room */
__u32 veto_list_sz;
__s8 ____payload[];
};
@@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
__s8 share[KSMBD_REQ_MAX_SHARE_NAME];
__s8 peer_addr[64];
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response {
__u32 handle;
__u16 status;
__u16 connection_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response {
struct ksmbd_tree_disconnect_request {
__u64 session_id; /* session id */
__u64 connect_id; /* tree connection id */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request {
struct ksmbd_logout_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
__u32 account_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
index 1019d3677d55..279d00feff21 100644
--- a/fs/ksmbd/mgmt/user_config.c
+++ b/fs/ksmbd/mgmt/user_config.c
@@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user)
return 1;
return 0;
}
+
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2)
+{
+ if (strcmp(u1->name, u2->name))
+ return false;
+ if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz))
+ return false;
+
+ return true;
+}
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
index aff80b029579..6a44109617f1 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account);
struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
void ksmbd_free_user(struct ksmbd_user *user);
int ksmbd_anonymous_user(struct ksmbd_user *user);
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2);
#endif /* __USER_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
index 82289c3cbd2b..e241f16a3851 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -45,7 +45,6 @@ struct ksmbd_session {
int state;
__u8 *Preauth_HashValue;
- struct ntlmssp_auth ntlmssp;
char sess_key[CIFS_KEY_SIZE];
struct hlist_node hlist;
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
index 50d0b1022289..4a9460153b59 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/ksmbd/smb2misc.c
@@ -289,7 +289,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
void *__hdr = hdr;
- int ret;
+ int ret = 0;
switch (hdr->Command) {
case SMB2_QUERY_INFO:
@@ -326,21 +326,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
credit_charge, calc_credit_num);
return 1;
- } else if (credit_charge > conn->max_credits) {
+ } else if (credit_charge > conn->vals->max_credits) {
ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
return 1;
}
spin_lock(&conn->credits_lock);
- if (credit_charge <= conn->total_credits) {
- conn->total_credits -= credit_charge;
- ret = 0;
- } else {
+ if (credit_charge > conn->total_credits) {
ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
credit_charge, conn->total_credits);
ret = 1;
}
+
+ if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) {
+ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
+ credit_charge, conn->outstanding_credits);
+ ret = 1;
+ } else
+ conn->outstanding_credits += credit_charge;
+
spin_unlock(&conn->credits_lock);
+
return ret;
}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index 02a44d28bdaf..ab23da2120b9 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -19,6 +19,7 @@ static struct smb_version_values smb21_server_values = {
.max_read_size = SMB21_DEFAULT_IOSIZE,
.max_write_size = SMB21_DEFAULT_IOSIZE,
.max_trans_size = SMB21_DEFAULT_IOSIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -44,6 +45,7 @@ static struct smb_version_values smb30_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -70,6 +72,7 @@ static struct smb_version_values smb302_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -96,6 +99,7 @@ static struct smb_version_values smb311_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -197,7 +201,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
conn->ops = &smb2_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -215,7 +218,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -240,7 +242,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -265,7 +266,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->ops = &smb3_11_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -304,3 +304,11 @@ void init_smb2_max_trans_size(unsigned int sz)
smb302_server_values.max_trans_size = sz;
smb311_server_values.max_trans_size = sz;
}
+
+void init_smb2_max_credits(unsigned int sz)
+{
+ smb21_server_values.max_credits = sz;
+ smb30_server_values.max_credits = sz;
+ smb302_server_values.max_credits = sz;
+ smb311_server_values.max_credits = sz;
+}
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index b8b3a4c28b74..1866c81c5c99 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -299,16 +299,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
struct ksmbd_conn *conn = work->conn;
- unsigned short credits_requested;
+ unsigned short credits_requested, aux_max;
unsigned short credit_charge, credits_granted = 0;
- unsigned short aux_max, aux_credits;
if (work->send_no_response)
return 0;
hdr->CreditCharge = req_hdr->CreditCharge;
- if (conn->total_credits > conn->max_credits) {
+ if (conn->total_credits > conn->vals->max_credits) {
hdr->CreditRequest = 0;
pr_err("Total credits overflow: %d\n", conn->total_credits);
return -EINVAL;
@@ -316,6 +315,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
credit_charge = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditCharge), 1);
+ if (credit_charge > conn->total_credits) {
+ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+ credit_charge, conn->total_credits);
+ return -EINVAL;
+ }
+
+ conn->total_credits -= credit_charge;
+ conn->outstanding_credits -= credit_charge;
credits_requested = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditRequest), 1);
@@ -325,16 +332,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
* TODO: Need to adjuct CreditRequest value according to
* current cpu load
*/
- aux_credits = credits_requested - 1;
if (hdr->Command == SMB2_NEGOTIATE)
- aux_max = 0;
+ aux_max = 1;
else
- aux_max = conn->max_credits - credit_charge;
- aux_credits = min_t(unsigned short, aux_credits, aux_max);
- credits_granted = credit_charge + aux_credits;
+ aux_max = conn->vals->max_credits - credit_charge;
+ credits_granted = min_t(unsigned short, credits_requested, aux_max);
- if (conn->max_credits - conn->total_credits < credits_granted)
- credits_granted = conn->max_credits -
+ if (conn->vals->max_credits - conn->total_credits < credits_granted)
+ credits_granted = conn->vals->max_credits -
conn->total_credits;
conn->total_credits += credits_granted;
@@ -610,16 +615,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id)
/**
* smb2_get_name() - get filename string from on the wire smb format
- * @share: ksmbd_share_config pointer
* @src: source buffer
* @maxlen: maxlen of source string
- * @nls_table: nls_table pointer
+ * @local_nls: nls_table pointer
*
* Return: matching converted filename on success, otherwise error ptr
*/
static char *
-smb2_get_name(struct ksmbd_share_config *share, const char *src,
- const int maxlen, struct nls_table *local_nls)
+smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
{
char *name;
@@ -1303,7 +1306,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
int sz, rc;
ksmbd_debug(SMB, "negotiate phase\n");
- rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
+ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn);
if (rc)
return rc;
@@ -1313,7 +1316,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
memset(chgblob, 0, sizeof(struct challenge_message));
if (!work->conn->use_spnego) {
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0)
return -ENOMEM;
@@ -1329,7 +1332,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
return -ENOMEM;
chgblob = (struct challenge_message *)neg_blob;
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0) {
rc = -ENOMEM;
goto out;
@@ -1450,60 +1453,62 @@ static int ntlm_authenticate(struct ksmbd_work *work)
ksmbd_free_user(user);
return 0;
}
- ksmbd_free_user(sess->user);
- }
- sess->user = user;
- if (user_guest(sess->user)) {
- if (conn->sign) {
- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+ if (!ksmbd_compare_user(sess->user, user)) {
+ ksmbd_free_user(user);
return -EPERM;
}
+ ksmbd_free_user(user);
+ } else {
+ sess->user = user;
+ }
+ if (user_guest(sess->user)) {
rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
} else {
struct authenticate_message *authblob;
authblob = user_authblob(conn, req);
sz = le16_to_cpu(req->SecurityBufferLength);
- rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
if (rc) {
set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
ksmbd_debug(SMB, "authentication failed\n");
return -EPERM;
}
+ }
- /*
- * If session state is SMB2_SESSION_VALID, We can assume
- * that it is reauthentication. And the user/password
- * has been verified, so return it here.
- */
- if (sess->state == SMB2_SESSION_VALID) {
- if (conn->binding)
- goto binding_session;
- return 0;
- }
+ /*
+ * If session state is SMB2_SESSION_VALID, We can assume
+ * that it is reauthentication. And the user/password
+ * has been verified, so return it here.
+ */
+ if (sess->state == SMB2_SESSION_VALID) {
+ if (conn->binding)
+ goto binding_session;
+ return 0;
+ }
- if ((conn->sign || server_conf.enforced_signing) ||
- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
- sess->sign = true;
+ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE &&
+ (conn->sign || server_conf.enforced_signing)) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
- if (smb3_encryption_negotiated(conn) &&
- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
- rc = conn->ops->generate_encryptionkey(sess);
- if (rc) {
- ksmbd_debug(SMB,
- "SMB3 encryption key generation failed\n");
- return -EINVAL;
- }
- sess->enc = true;
- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
- /*
- * signing is disable if encryption is enable
- * on this session
- */
- sess->sign = false;
+ if (smb3_encryption_negotiated(conn) &&
+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ rc = conn->ops->generate_encryptionkey(sess);
+ if (rc) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
}
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ /*
+ * signing is disable if encryption is enable
+ * on this session
+ */
+ sess->sign = false;
}
binding_session:
@@ -2057,9 +2062,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_debug(SMB, "request\n");
- /* Got a valid session, set connection state */
- WARN_ON(sess->conn != conn);
-
/* setting CifsExiting here may race with start_tcp_sess */
ksmbd_conn_set_need_reconnect(work);
ksmbd_close_session_fds(work);
@@ -2530,8 +2532,7 @@ int smb2_open(struct ksmbd_work *work)
goto err_out1;
}
- name = smb2_get_name(share,
- req->Buffer,
+ name = smb2_get_name(req->Buffer,
le16_to_cpu(req->NameLength),
work->conn->local_nls);
if (IS_ERR(name)) {
@@ -3392,7 +3393,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
* @conn: connection instance
* @info_level: smb information level
* @d_info: structure included variables for query dir
- * @user_ns: user namespace
* @ksmbd_kstat: ksmbd wrapper of dirent stat information
*
* if directory has many entries, find first can't read it fully.
@@ -4018,6 +4018,7 @@ err_out2:
* buffer_check_err() - helper function to check buffer errors
* @reqOutputBufferLength: max buffer length expected in command response
* @rsp: query info response buffer contains output buffer length
+ * @rsp_org: base response buffer pointer in case of chained response
* @infoclass_size: query info class response buffer size
*
* Return: 0 on success, otherwise error
@@ -5398,8 +5399,7 @@ static int smb2_rename(struct ksmbd_work *work,
goto out;
}
- new_name = smb2_get_name(share,
- file_info->FileName,
+ new_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(new_name)) {
@@ -5510,8 +5510,7 @@ static int smb2_create_link(struct ksmbd_work *work,
if (!pathname)
return -ENOMEM;
- link_name = smb2_get_name(share,
- file_info->FileName,
+ link_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
@@ -5849,7 +5848,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
* smb2_set_info_file() - handler for smb2 set info command
* @work: smb work containing set info command buffer
* @fp: ksmbd_file pointer
- * @info_class: smb2 set info class
+ * @req: request buffer pointer
* @share: ksmbd_share_config pointer
*
* Return: 0 on success, otherwise error
@@ -6121,25 +6120,33 @@ out:
return err;
}
-static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
- struct smb2_read_req *req, void *data_buf,
- size_t length)
+static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+ struct smb2_buffer_desc_v1 *desc,
+ __le32 Channel,
+ __le16 ChannelInfoOffset,
+ __le16 ChannelInfoLength)
{
- struct smb2_buffer_desc_v1 *desc =
- (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- int err;
-
if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
+ Channel != SMB2_CHANNEL_RDMA_V1)
return -EINVAL;
- if (req->ReadChannelInfoOffset == 0 ||
- le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ if (ChannelInfoOffset == 0 ||
+ le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
return -EINVAL;
work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
work->remote_key = le32_to_cpu(desc->token);
+ return 0;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ struct smb2_buffer_desc_v1 *desc =
+ (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+ int err;
err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
le32_to_cpu(desc->token),
@@ -6162,7 +6169,7 @@ int smb2_read(struct ksmbd_work *work)
struct ksmbd_conn *conn = work->conn;
struct smb2_read_req *req;
struct smb2_read_rsp *rsp;
- struct ksmbd_file *fp;
+ struct ksmbd_file *fp = NULL;
loff_t offset;
size_t length, mincount;
ssize_t nbytes = 0, remain_bytes = 0;
@@ -6176,6 +6183,18 @@ int smb2_read(struct ksmbd_work *work)
return smb2_read_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->ReadChannelInfoOffset,
+ req->ReadChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
le64_to_cpu(req->PersistentFileId));
if (!fp) {
@@ -6361,21 +6380,6 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
- return -EINVAL;
-
- if (req->Length != 0 || req->DataOffset != 0)
- return -EINVAL;
-
- if (req->WriteChannelInfoOffset == 0 ||
- le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
- return -EINVAL;
-
- work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
- work->remote_key = le32_to_cpu(desc->token);
-
data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
if (!data_buf)
return -ENOMEM;
@@ -6422,6 +6426,20 @@ int smb2_write(struct ksmbd_work *work)
return smb2_write_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ if (req->Length != 0 || req->DataOffset != 0)
+ return -EINVAL;
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->WriteChannelInfoOffset,
+ req->WriteChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB, "User does not have write permission\n");
err = -EACCES;
@@ -7243,15 +7261,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
struct sockaddr_storage_rsp *sockaddr_storage;
unsigned int flags;
unsigned long long speed;
- struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
- if (out_buf_len <
- nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
- rtnl_unlock();
- return -ENOSPC;
- }
+ bool ipv4_set = false;
if (netdev->type == ARPHRD_LOOPBACK)
continue;
@@ -7259,12 +7272,20 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
flags = dev_get_flags(netdev);
if (!(flags & IFF_RUNNING))
continue;
+ipv6_retry:
+ if (out_buf_len <
+ nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+ rtnl_unlock();
+ return -ENOSPC;
+ }
nii_rsp = (struct network_interface_info_ioctl_rsp *)
&rsp->Buffer[nbytes];
nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
nii_rsp->Capability = 0;
+ if (netdev->real_num_tx_queues > 1)
+ nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE);
if (ksmbd_rdma_capable_netdev(netdev))
nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
@@ -7289,8 +7310,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
nii_rsp->SockAddr_Storage;
memset(sockaddr_storage, 0, 128);
- if (conn->peer_addr.ss_family == PF_INET ||
- ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ if (!ipv4_set) {
struct in_device *idev;
sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
@@ -7301,6 +7321,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
continue;
sockaddr_storage->addr4.IPv4address =
idev_ipv4_address(idev);
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ ipv4_set = true;
+ goto ipv6_retry;
} else {
struct inet6_dev *idev6;
struct inet6_ifaddr *ifa;
@@ -7322,9 +7345,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
break;
}
sockaddr_storage->addr6.ScopeId = 0;
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
-
- nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
rtnl_unlock();
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 4a3e4339d4c4..725b800c29c8 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -980,6 +980,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn);
void init_smb2_max_read_size(unsigned int sz);
void init_smb2_max_write_size(unsigned int sz);
void init_smb2_max_trans_size(unsigned int sz);
+void init_smb2_max_credits(unsigned int sz);
bool is_smb2_neg_cmd(struct ksmbd_work *work);
bool is_smb2_rsp(struct ksmbd_work *work);
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 50590842b651..e1369b4345a9 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -365,6 +365,7 @@ struct smb_version_values {
__u32 max_read_size;
__u32 max_write_size;
__u32 max_trans_size;
+ __u32 max_credits;
__u32 large_lock_type;
__u32 exclusive_lock_type;
__u32 shared_lock_type;
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index 1acf1892a466..3ad6881e0f7e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
init_smb2_max_write_size(req->smb2_max_write);
if (req->smb2_max_trans)
init_smb2_max_trans_size(req->smb2_max_trans);
+ if (req->smb2_max_credits)
+ init_smb2_max_credits(req->smb2_max_credits);
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 7e57cbb0bb35..3c1ec1ac0b27 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -34,7 +34,8 @@
#include "smbstatus.h"
#include "transport_rdma.h"
-#define SMB_DIRECT_PORT 5445
+#define SMB_DIRECT_PORT_IWARP 5445
+#define SMB_DIRECT_PORT_INFINIBAND 445
#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
@@ -60,6 +61,10 @@
* as defined in [MS-SMBD] 3.1.1.1
* Those may change after a SMB_DIRECT negotiation
*/
+
+/* Set 445 port to SMB Direct port by default */
+static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
+
/* The local peer's maximum number of credits to grant to the peer */
static int smb_direct_receive_credit_max = 255;
@@ -75,10 +80,18 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
static int smb_direct_max_receive_size = 8192;
-static int smb_direct_max_read_write_size = 1024 * 1024;
+static int smb_direct_max_read_write_size = 1048512;
static int smb_direct_max_outstanding_rw_ops = 8;
+static LIST_HEAD(smb_direct_device_list);
+static DEFINE_RWLOCK(smb_direct_device_lock);
+
+struct smb_direct_device {
+ struct ib_device *ib_dev;
+ struct list_head list;
+};
+
static struct smb_direct_listener {
struct rdma_cm_id *cm_id;
} smb_direct_listener;
@@ -415,6 +428,7 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
+ ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
ib_destroy_qp(t->qp);
}
@@ -555,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
t->negotiation_requested = true;
t->full_packet_received = true;
+ enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
break;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
@@ -1438,6 +1453,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t)
free_transport(st);
}
+static void smb_direct_shutdown(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+}
+
static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1581,19 +1605,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t)
pr_err("error at rdma_accept: %d\n", ret);
return ret;
}
-
- wait_event_interruptible(t->wait_status,
- t->status != SMB_DIRECT_CS_NEW);
- if (t->status != SMB_DIRECT_CS_CONNECTED)
- return -ENOTCONN;
return 0;
}
-static int smb_direct_negotiate(struct smb_direct_transport *t)
+static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
{
int ret;
struct smb_direct_recvmsg *recvmsg;
- struct smb_direct_negotiate_req *req;
recvmsg = get_free_recvmsg(t);
if (!recvmsg)
@@ -1603,44 +1621,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t)
ret = smb_direct_post_recv(t, recvmsg);
if (ret) {
pr_err("Can't post recv: %d\n", ret);
- goto out;
+ goto out_err;
}
t->negotiation_requested = false;
ret = smb_direct_accept_client(t);
if (ret) {
pr_err("Can't accept client\n");
- goto out;
+ goto out_err;
}
smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
-
- ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
- ret = wait_event_interruptible_timeout(t->wait_status,
- t->negotiation_requested ||
- t->status == SMB_DIRECT_CS_DISCONNECTED,
- SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
- if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
- ret = ret < 0 ? ret : -ETIMEDOUT;
- goto out;
- }
-
- ret = smb_direct_check_recvmsg(recvmsg);
- if (ret == -ECONNABORTED)
- goto out;
-
- req = (struct smb_direct_negotiate_req *)recvmsg->packet;
- t->max_recv_size = min_t(int, t->max_recv_size,
- le32_to_cpu(req->preferred_send_size));
- t->max_send_size = min_t(int, t->max_send_size,
- le32_to_cpu(req->max_receive_size));
- t->max_fragmented_send_size =
- le32_to_cpu(req->max_fragmented_size);
-
- ret = smb_direct_send_negotiate_response(t, ret);
-out:
- if (recvmsg)
- put_recvmsg(t, recvmsg);
+ return 0;
+out_err:
+ put_recvmsg(t, recvmsg);
return ret;
}
@@ -1724,7 +1718,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
- cap->max_rdma_ctxs = 0;
+ cap->max_rdma_ctxs =
+ rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
+ smb_direct_max_outstanding_rw_ops;
return 0;
}
@@ -1806,6 +1802,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
{
int ret;
struct ib_qp_init_attr qp_attr;
+ int pages_per_rw;
t->pd = ib_alloc_pd(t->cm_id->device, 0);
if (IS_ERR(t->pd)) {
@@ -1853,6 +1850,23 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
t->qp = t->cm_id->qp;
t->cm_id->event_handler = smb_direct_cm_handler;
+ pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+ int pages_per_mr, mr_count;
+
+ pages_per_mr = min_t(int, pages_per_rw,
+ t->cm_id->device->attrs.max_fast_reg_page_list_len);
+ mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
+ atomic_read(&t->rw_avail_ops);
+ ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
+ IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
+ if (ret) {
+ pr_err("failed to init mr pool count %d pages %d\n",
+ mr_count, pages_per_mr);
+ goto err;
+ }
+ }
+
return 0;
err:
if (t->qp) {
@@ -1877,6 +1891,49 @@ err:
static int smb_direct_prepare(struct ksmbd_transport *t)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+ int ret;
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(st->wait_status,
+ st->negotiation_requested ||
+ st->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
+ return ret < 0 ? ret : -ETIMEDOUT;
+
+ recvmsg = get_first_reassembly(st);
+ if (!recvmsg)
+ return -ECONNABORTED;
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ st->max_recv_size = min_t(int, st->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ st->max_send_size = min_t(int, st->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ st->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+ st->max_fragmented_recv_size =
+ (st->recv_credit_max * st->max_recv_size) / 2;
+
+ ret = smb_direct_send_negotiate_response(st, ret);
+out:
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_queue_length--;
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ put_recvmsg(st, recvmsg);
+
+ return ret;
+}
+
+static int smb_direct_connect(struct smb_direct_transport *st)
+{
int ret;
struct ib_qp_cap qp_cap;
@@ -1898,13 +1955,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
return ret;
}
- ret = smb_direct_negotiate(st);
+ ret = smb_direct_prepare_negotiation(st);
if (ret) {
pr_err("Can't negotiate: %d\n", ret);
return ret;
}
-
- st->status = SMB_DIRECT_CS_CONNECTED;
return 0;
}
@@ -1920,6 +1975,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
{
struct smb_direct_transport *t;
+ int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
ksmbd_debug(RDMA,
@@ -1932,18 +1988,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ ret = smb_direct_connect(t);
+ if (ret)
+ goto out_err;
+
KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
KSMBD_TRANS(t)->conn, "ksmbd:r%u",
- SMB_DIRECT_PORT);
+ smb_direct_port);
if (IS_ERR(KSMBD_TRANS(t)->handler)) {
- int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
-
+ ret = PTR_ERR(KSMBD_TRANS(t)->handler);
pr_err("Can't start thread\n");
- free_transport(t);
- return ret;
+ goto out_err;
}
return 0;
+out_err:
+ free_transport(t);
+ return ret;
}
static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
@@ -2007,12 +2068,65 @@ err:
return ret;
}
+static int smb_direct_ib_client_add(struct ib_device *ib_dev)
+{
+ struct smb_direct_device *smb_dev;
+
+ /* Set 5445 port if device type is iWARP(No IB) */
+ if (ib_dev->node_type != RDMA_NODE_IB_CA)
+ smb_direct_port = SMB_DIRECT_PORT_IWARP;
+
+ if (!ib_dev->ops.get_netdev ||
+ !rdma_frwr_is_supported(&ib_dev->attrs))
+ return 0;
+
+ smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
+ if (!smb_dev)
+ return -ENOMEM;
+ smb_dev->ib_dev = ib_dev;
+
+ write_lock(&smb_direct_device_lock);
+ list_add(&smb_dev->list, &smb_direct_device_list);
+ write_unlock(&smb_direct_device_lock);
+
+ ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
+ return 0;
+}
+
+static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
+ void *client_data)
+{
+ struct smb_direct_device *smb_dev, *tmp;
+
+ write_lock(&smb_direct_device_lock);
+ list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
+ if (smb_dev->ib_dev == ib_dev) {
+ list_del(&smb_dev->list);
+ kfree(smb_dev);
+ break;
+ }
+ }
+ write_unlock(&smb_direct_device_lock);
+}
+
+static struct ib_client smb_direct_ib_client = {
+ .name = "ksmbd_smb_direct_ib",
+ .add = smb_direct_ib_client_add,
+ .remove = smb_direct_ib_client_remove,
+};
+
int ksmbd_rdma_init(void)
{
int ret;
smb_direct_listener.cm_id = NULL;
+ ret = ib_register_client(&smb_direct_ib_client);
+ if (ret) {
+ pr_err("failed to ib_register_client\n");
+ return ret;
+ }
+
/* When a client is running out of send credits, the credits are
* granted by the server's sending a packet using this queue.
* This avoids the situation that a clients cannot send packets
@@ -2023,7 +2137,7 @@ int ksmbd_rdma_init(void)
if (!smb_direct_wq)
return -ENOMEM;
- ret = smb_direct_listen(SMB_DIRECT_PORT);
+ ret = smb_direct_listen(smb_direct_port);
if (ret) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
@@ -2036,36 +2150,67 @@ int ksmbd_rdma_init(void)
return 0;
}
-int ksmbd_rdma_destroy(void)
+void ksmbd_rdma_destroy(void)
{
- if (smb_direct_listener.cm_id)
- rdma_destroy_id(smb_direct_listener.cm_id);
+ if (!smb_direct_listener.cm_id)
+ return;
+
+ ib_unregister_client(&smb_direct_ib_client);
+ rdma_destroy_id(smb_direct_listener.cm_id);
+
smb_direct_listener.cm_id = NULL;
if (smb_direct_wq) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
}
- return 0;
}
bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
{
- struct ib_device *ibdev;
+ struct smb_direct_device *smb_dev;
+ int i;
bool rdma_capable = false;
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
- if (ibdev) {
- if (rdma_frwr_is_supported(&ibdev->attrs))
- rdma_capable = true;
- ib_device_put(ibdev);
+ read_lock(&smb_direct_device_lock);
+ list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
+ for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
+ struct net_device *ndev;
+
+ ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
+ i + 1);
+ if (!ndev)
+ continue;
+
+ if (ndev == netdev) {
+ dev_put(ndev);
+ rdma_capable = true;
+ goto out;
+ }
+ dev_put(ndev);
+ }
+ }
+out:
+ read_unlock(&smb_direct_device_lock);
+
+ if (rdma_capable == false) {
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
}
+
return rdma_capable;
}
static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
+ .shutdown = smb_direct_shutdown,
.writev = smb_direct_writev,
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
index 0fa8adc0776f..5567d93a6f96 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/ksmbd/transport_rdma.h
@@ -7,8 +7,6 @@
#ifndef __KSMBD_TRANSPORT_RDMA_H__
#define __KSMBD_TRANSPORT_RDMA_H__
-#define SMB_DIRECT_PORT 5445
-
/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
struct smb_direct_negotiate_req {
__le16 min_version;
@@ -52,7 +50,7 @@ struct smb_direct_data_transfer {
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
int ksmbd_rdma_init(void);
-int ksmbd_rdma_destroy(void);
+void ksmbd_rdma_destroy(void);
bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
#else
static inline int ksmbd_rdma_init(void) { return 0; }
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index c14320e03b69..82a1429bbe12 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -404,7 +404,7 @@ static int create_socket(struct interface *iface)
&ksmbd_socket);
if (ret) {
pr_err("Can't create socket for ipv4: %d\n", ret);
- goto out_error;
+ goto out_clear;
}
sin.sin_family = PF_INET;
@@ -462,6 +462,7 @@ static int create_socket(struct interface *iface)
out_error:
tcp_destroy_socket(ksmbd_socket);
+out_clear:
iface->ksmbd_socket = NULL;
return ret;
}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 448576fbe4b7..36239ce31afd 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -96,16 +96,6 @@ struct ksmbd_file {
int durable_timeout;
- /* for SMB1 */
- int pid;
-
- /* conflict lock fail count for SMB1 */
- unsigned int cflock_cnt;
- /* last lock failure start offset for SMB1 */
- unsigned long long llock_fstart;
-
- int dirent_offset;
-
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
int dot_dotdot[2];
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b220e1b91726..0475c5a5d061 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -54,13 +54,9 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
-static struct task_struct *nlmsvc_task;
-static struct svc_rqst *nlmsvc_rqst;
+static struct svc_serv *nlmsvc_serv;
unsigned long nlmsvc_timeout;
-static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
-
unsigned int lockd_net_id;
/*
@@ -184,7 +180,12 @@ lockd(void *vrqstp)
nlm_shutdown_hosts();
cancel_delayed_work_sync(&ln->grace_period_end);
locks_end_grace(&ln->lockd_manager);
- return 0;
+
+ dprintk("lockd_down: service stopped\n");
+
+ svc_exit_thread(rqstp);
+
+ module_put_and_kthread_exit(0);
}
static int create_lockd_listener(struct svc_serv *serv, const char *name,
@@ -290,8 +291,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
__func__, net->ns.inum);
}
} else {
- pr_err("%s: no users! task=%p, net=%x\n",
- __func__, nlmsvc_task, net->ns.inum);
+ pr_err("%s: no users! net=%x\n",
+ __func__, net->ns.inum);
BUG();
}
}
@@ -302,20 +303,16 @@ static int lockd_inetaddr_event(struct notifier_block *this,
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ if (event != NETDEV_DOWN)
goto out;
- if (nlmsvc_rqst) {
+ if (nlmsvc_serv) {
dprintk("lockd_inetaddr_event: removed %pI4\n",
&ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
- (struct sockaddr *)&sin);
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nlm_ntf_refcnt);
- wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -332,21 +329,17 @@ static int lockd_inet6addr_event(struct notifier_block *this,
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ if (event != NETDEV_DOWN)
goto out;
- if (nlmsvc_rqst) {
+ if (nlmsvc_serv) {
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
- (struct sockaddr *)&sin6);
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nlm_ntf_refcnt);
- wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -357,86 +350,22 @@ static struct notifier_block lockd_inet6addr_notifier = {
};
#endif
-static void lockd_unregister_notifiers(void)
-{
- unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
-#if IS_ENABLED(CONFIG_IPV6)
- unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
-#endif
- wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);
-}
-
-static void lockd_svc_exit_thread(void)
-{
- atomic_dec(&nlm_ntf_refcnt);
- lockd_unregister_notifiers();
- svc_exit_thread(nlmsvc_rqst);
-}
-
-static int lockd_start_svc(struct svc_serv *serv)
-{
- int error;
-
- if (nlmsvc_rqst)
- return 0;
-
- /*
- * Create the kernel thread and wait for it to start.
- */
- nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
- if (IS_ERR(nlmsvc_rqst)) {
- error = PTR_ERR(nlmsvc_rqst);
- printk(KERN_WARNING
- "lockd_up: svc_rqst allocation failed, error=%d\n",
- error);
- lockd_unregister_notifiers();
- goto out_rqst;
- }
-
- atomic_inc(&nlm_ntf_refcnt);
- svc_sock_update_bufs(serv);
- serv->sv_maxconn = nlm_max_connections;
-
- nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
- if (IS_ERR(nlmsvc_task)) {
- error = PTR_ERR(nlmsvc_task);
- printk(KERN_WARNING
- "lockd_up: kthread_run failed, error=%d\n", error);
- goto out_task;
- }
- nlmsvc_rqst->rq_task = nlmsvc_task;
- wake_up_process(nlmsvc_task);
-
- dprintk("lockd_up: service started\n");
- return 0;
-
-out_task:
- lockd_svc_exit_thread();
- nlmsvc_task = NULL;
-out_rqst:
- nlmsvc_rqst = NULL;
- return error;
-}
-
static const struct svc_serv_ops lockd_sv_ops = {
.svo_shutdown = svc_rpcb_cleanup,
+ .svo_function = lockd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
};
-static struct svc_serv *lockd_create_svc(void)
+static int lockd_get(void)
{
struct svc_serv *serv;
+ int error;
- /*
- * Check whether we're already up and running.
- */
- if (nlmsvc_rqst) {
- /*
- * Note: increase service usage, because later in case of error
- * svc_destroy() will be called.
- */
- svc_get(nlmsvc_rqst->rq_server);
- return nlmsvc_rqst->rq_server;
+ if (nlmsvc_serv) {
+ svc_get(nlmsvc_serv);
+ nlmsvc_users++;
+ return 0;
}
/*
@@ -454,14 +383,41 @@ static struct svc_serv *lockd_create_svc(void)
serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
+
+ serv->sv_maxconn = nlm_max_connections;
+ error = svc_set_num_threads(serv, NULL, 1);
+ /* The thread now holds the only reference */
+ svc_put(serv);
+ if (error < 0)
+ return error;
+
+ nlmsvc_serv = serv;
register_inetaddr_notifier(&lockd_inetaddr_notifier);
#if IS_ENABLED(CONFIG_IPV6)
register_inet6addr_notifier(&lockd_inet6addr_notifier);
#endif
dprintk("lockd_up: service created\n");
- return serv;
+ nlmsvc_users++;
+ return 0;
+}
+
+static void lockd_put(void)
+{
+ if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n"))
+ return;
+ if (--nlmsvc_users)
+ return;
+
+ unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+#endif
+
+ svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ nlmsvc_serv = NULL;
+ dprintk("lockd_down: service destroyed\n");
}
/*
@@ -469,36 +425,21 @@ static struct svc_serv *lockd_create_svc(void)
*/
int lockd_up(struct net *net, const struct cred *cred)
{
- struct svc_serv *serv;
int error;
mutex_lock(&nlmsvc_mutex);
- serv = lockd_create_svc();
- if (IS_ERR(serv)) {
- error = PTR_ERR(serv);
- goto err_create;
- }
+ error = lockd_get();
+ if (error)
+ goto err;
- error = lockd_up_net(serv, net, cred);
+ error = lockd_up_net(nlmsvc_serv, net, cred);
if (error < 0) {
- lockd_unregister_notifiers();
- goto err_put;
+ lockd_put();
+ goto err;
}
- error = lockd_start_svc(serv);
- if (error < 0) {
- lockd_down_net(serv, net);
- goto err_put;
- }
- nlmsvc_users++;
- /*
- * Note: svc_serv structures have an initial use count of 1,
- * so we exit through here on both success and failure.
- */
-err_put:
- svc_destroy(serv);
-err_create:
+err:
mutex_unlock(&nlmsvc_mutex);
return error;
}
@@ -511,27 +452,8 @@ void
lockd_down(struct net *net)
{
mutex_lock(&nlmsvc_mutex);
- lockd_down_net(nlmsvc_rqst->rq_server, net);
- if (nlmsvc_users) {
- if (--nlmsvc_users)
- goto out;
- } else {
- printk(KERN_ERR "lockd_down: no users! task=%p\n",
- nlmsvc_task);
- BUG();
- }
-
- if (!nlmsvc_task) {
- printk(KERN_ERR "lockd_down: no lockd running.\n");
- BUG();
- }
- kthread_stop(nlmsvc_task);
- dprintk("lockd_down: service stopped\n");
- lockd_svc_exit_thread();
- dprintk("lockd_down: service destroyed\n");
- nlmsvc_task = NULL;
- nlmsvc_rqst = NULL;
-out:
+ lockd_down_net(nlmsvc_serv, net);
+ lockd_put();
mutex_unlock(&nlmsvc_mutex);
}
EXPORT_SYMBOL_GPL(lockd_down);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e9b85d8fd5fe..cb3658ab9b7a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -470,8 +470,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_host *host, struct nlm_lock *lock, int wait,
struct nlm_cookie *cookie, int reclaim)
{
- struct nlm_block *block = NULL;
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct inode *inode = nlmsvc_file_inode(file);
+#endif
+ struct nlm_block *block = NULL;
int error;
int mode;
int async_block = 0;
@@ -484,7 +486,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
- if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
+ if (nlmsvc_file_file(file)->f_op->lock) {
async_block = wait;
wait = 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index 0fca9d680978..8c6df10cd9ed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -62,6 +62,7 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
+#include <linux/sysctl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -88,8 +89,37 @@ static int target_leasetype(struct file_lock *fl)
return fl->fl_type;
}
-int leases_enable = 1;
-int lease_break_time = 45;
+static int leases_enable = 1;
+static int lease_break_time = 45;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table locks_sysctls[] = {
+ {
+ .procname = "leases-enable",
+ .data = &leases_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_MMU
+ {
+ .procname = "lease-break-time",
+ .data = &lease_break_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_MMU */
+ {}
+};
+
+static int __init init_fs_locks_sysctls(void)
+{
+ register_sysctl_init("fs", locks_sysctls);
+ return 0;
+}
+early_initcall(init_fs_locks_sysctls);
+#endif /* CONFIG_SYSCTL */
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
diff --git a/fs/mpage.c b/fs/mpage.c
index 334e7d09aa65..87f5cfef6caa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -29,7 +29,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "internal.h"
/*
@@ -284,12 +283,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
- cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
-
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
diff --git a/fs/namei.c b/fs/namei.c
index d81f04f8d818..b867a92c078e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1020,10 +1020,60 @@ static inline void put_link(struct nameidata *nd)
path_put(&last->link);
}
-int sysctl_protected_symlinks __read_mostly = 0;
-int sysctl_protected_hardlinks __read_mostly = 0;
-int sysctl_protected_fifos __read_mostly;
-int sysctl_protected_regular __read_mostly;
+static int sysctl_protected_symlinks __read_mostly;
+static int sysctl_protected_hardlinks __read_mostly;
+static int sysctl_protected_fifos __read_mostly;
+static int sysctl_protected_regular __read_mostly;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table namei_sysctls[] = {
+ {
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_namei_sysctls(void)
+{
+ register_sysctl_init("fs", namei_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namei_sysctls);
+
+#endif /* CONFIG_SYSCTL */
/**
* may_follow_link - Check symlink following for unsafe situations
diff --git a/fs/namespace.c b/fs/namespace.c
index dc31ad6b370f..40b994a29e90 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -37,7 +37,7 @@
#include "internal.h"
/* Maximum number of mounts in a mount namespace */
-unsigned int sysctl_mount_max __read_mostly = 100000;
+static unsigned int sysctl_mount_max __read_mostly = 100000;
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
@@ -4620,3 +4620,25 @@ const struct proc_ns_operations mntns_operations = {
.install = mntns_install,
.owner = mntns_owner,
};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table fs_namespace_sysctls[] = {
+ {
+ .procname = "mount-max",
+ .data = &sysctl_mount_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ { }
+};
+
+static int __init init_fs_namespace_sysctls(void)
+{
+ register_sysctl_init("fs", fs_namespace_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namespace_sysctls);
+
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 6169659857b3..501da990c259 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request(
INIT_WORK(&rreq->work, netfs_rreq_work);
refcount_set(&rreq->usage, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- ops->init_rreq(rreq, file);
+ if (ops->init_rreq)
+ ops->init_rreq(rreq, file);
netfs_stat(&netfs_n_rh_rreq);
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 86d856de1389..054cc1255fac 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -93,7 +93,7 @@ nfs4_callback_svc(void *vrqstp)
svc_process(rqstp);
}
svc_exit_thread(rqstp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -137,7 +137,7 @@ nfs41_callback_svc(void *vrqstp)
}
}
svc_exit_thread(rqstp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -169,12 +169,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS)
nrservs = NFS4_MIN_NR_CALLBACK_THREADS;
- if (serv->sv_nrthreads-1 == nrservs)
+ if (serv->sv_nrthreads == nrservs)
return 0;
- ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
+ ret = svc_set_num_threads(serv, NULL, nrservs);
if (ret) {
- serv->sv_ops->svo_setup(serv, NULL, 0);
+ svc_set_num_threads(serv, NULL, 0);
return ret;
}
dprintk("nfs_callback_up: service started\n");
@@ -235,14 +235,12 @@ err_bind:
static const struct svc_serv_ops nfs40_cb_sv_ops = {
.svo_function = nfs4_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
#if defined(CONFIG_NFS_V4_1)
static const struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_function = nfs41_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
@@ -266,14 +264,8 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
/*
* Check whether we're already up and running.
*/
- if (cb_info->serv) {
- /*
- * Note: increase service usage, because later in case of error
- * svc_destroy() will be called.
- */
- svc_get(cb_info->serv);
- return cb_info->serv;
- }
+ if (cb_info->serv)
+ return svc_get(cb_info->serv);
switch (minorversion) {
case 0:
@@ -294,7 +286,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
cb_info->users);
- serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
if (!serv) {
printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
return ERR_PTR(-ENOMEM);
@@ -335,16 +327,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
goto err_start;
cb_info->users++;
- /*
- * svc_create creates the svc_serv with sv_nrthreads == 1, and then
- * svc_prepare_thread increments that. So we need to call svc_destroy
- * on both success and failure so that the refcount is 1 when the
- * thread exits.
- */
err_net:
if (!cb_info->users)
cb_info->serv = NULL;
- svc_destroy(serv);
+ svc_put(serv);
err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
@@ -369,8 +355,8 @@ void nfs_callback_down(int minorversion, struct net *net)
cb_info->users--;
if (cb_info->users == 0) {
svc_get(serv);
- serv->sv_ops->svo_setup(serv, NULL, 0);
- svc_destroy(serv);
+ svc_set_num_threads(serv, NULL, 0);
+ svc_put(serv);
dprintk("nfs_callback_down: service destroyed\n");
cb_info->serv = NULL;
}
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 171c424cb6d5..01596f2d0a1e 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -158,5 +158,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
+ EXPORT_OP_NOATOMIC_ATTR,
};
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f63dfa01001c..d88b779f9dd0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2693,6 +2693,6 @@ static int nfs4_run_state_manager(void *ptr)
allow_signal(SIGKILL);
nfs4_state_manager(clp);
nfs_put_client(clp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index fdf89fcf1a0c..8bc807c5fea4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -44,12 +44,9 @@ struct nfsd_fcache_bucket {
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
struct nfsd_fcache_disposal {
- struct list_head list;
struct work_struct work;
- struct net *net;
spinlock_t lock;
struct list_head freeme;
- struct rcu_head rcu;
};
static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
@@ -62,8 +59,6 @@ static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
-static DEFINE_SPINLOCK(laundrette_lock);
-static LIST_HEAD(laundrettes);
static void nfsd_file_gc(void);
@@ -194,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
- init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
@@ -249,7 +243,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
- nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
atomic_long_dec(&nfsd_filecache_count);
@@ -367,19 +361,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst,
static void
nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net == net) {
- spin_lock(&l->lock);
- list_splice_tail_init(files, &l->freeme);
- spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
- break;
- }
- }
- rcu_read_unlock();
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
}
static void
@@ -755,7 +743,7 @@ nfsd_file_cache_purge(struct net *net)
}
static struct nfsd_fcache_disposal *
-nfsd_alloc_fcache_disposal(struct net *net)
+nfsd_alloc_fcache_disposal(void)
{
struct nfsd_fcache_disposal *l;
@@ -763,7 +751,6 @@ nfsd_alloc_fcache_disposal(struct net *net)
if (!l)
return NULL;
INIT_WORK(&l->work, nfsd_file_delayed_close);
- l->net = net;
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@@ -772,61 +759,27 @@ nfsd_alloc_fcache_disposal(struct net *net)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
- rcu_assign_pointer(l->net, NULL);
cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
- kfree_rcu(l, rcu);
-}
-
-static void
-nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_add_tail_rcu(&l->list, &laundrettes);
- spin_unlock(&laundrette_lock);
-}
-
-static void
-nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_del_rcu(&l->list);
- spin_unlock(&laundrette_lock);
-}
-
-static int
-nfsd_alloc_fcache_disposal_net(struct net *net)
-{
- struct nfsd_fcache_disposal *l;
-
- l = nfsd_alloc_fcache_disposal(net);
- if (!l)
- return -ENOMEM;
- nfsd_add_fcache_disposal(l);
- return 0;
+ kfree(l);
}
static void
nfsd_free_fcache_disposal_net(struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net != net)
- continue;
- nfsd_del_fcache_disposal(l);
- rcu_read_unlock();
- nfsd_free_fcache_disposal(l);
- return;
- }
- rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
}
int
nfsd_file_cache_start_net(struct net *net)
{
- return nfsd_alloc_fcache_disposal_net(net);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->fcache_disposal = nfsd_alloc_fcache_disposal();
+ return nn->fcache_disposal ? 0 : -ENOMEM;
}
void
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 7872df5a0fe3..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
- struct rw_semaphore nf_rwsem;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 935c1028c217..1b1a962a1804 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/percpu_counter.h>
+#include <linux/siphash.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -108,9 +109,8 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
- /* Time of server startup */
- struct timespec64 nfssvc_boot;
- seqlock_t boot_lock;
+ seqlock_t writeverf_lock;
+ unsigned char writeverf[8];
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -123,12 +123,13 @@ struct nfsd_net {
u32 clverifier_counter;
struct svc_serv *nfsd_serv;
-
- wait_queue_head_t ntf_wq;
- atomic_t ntf_refcnt;
-
- /* Allow umount to wait for nfsd state cleanup */
- struct completion nfsd_shutdown_complete;
+ /* When a listening socket is added to nfsd, keep_active is set
+ * and this justifies a reference on nfsd_serv. This stops
+ * nfsd_serv from being freed. When the number of threads is
+ * set, keep_active is cleared and the reference is dropped. So
+ * when the last thread exits, the service will be destroyed.
+ */
+ int keep_active;
/*
* clientid and stateid data for construction of net unique COPY
@@ -184,6 +185,10 @@ struct nfsd_net {
/* utsname taken from the process that starts the server */
char nfsd_name[UNX_MAXNODENAME+1];
+
+ struct nfsd_fcache_disposal *fcache_disposal;
+
+ siphash_key_t siphash_key;
};
/* Simple check to find out if a given net was properly initialized */
@@ -193,6 +198,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
-void nfsd_reset_boot_verifier(struct nfsd_net *nn);
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 15dac36ca852..8ef53f6726ec 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,15 +202,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
+
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
resp->committed, resp->verf);
resp->count = cnt;
-out:
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c3ac1b6aa3aa..7c45ba4db61b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -487,71 +487,6 @@ neither:
return true;
}
-static bool fs_supports_change_attribute(struct super_block *sb)
-{
- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion;
-}
-
-/*
- * Fill in the pre_op attr for the wcc data
- */
-void fill_pre_wcc(struct svc_fh *fhp)
-{
- struct inode *inode;
- struct kstat stat;
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
-
- if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
- inode = d_inode(fhp->fh_dentry);
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &stat);
-
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- }
- fhp->fh_pre_mtime = stat.mtime;
- fhp->fh_pre_ctime = stat.ctime;
- fhp->fh_pre_size = stat.size;
- }
- if (v4)
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
-
- fhp->fh_pre_saved = true;
-}
-
-/*
- * Fill in the post_op attr for the wcc data
- */
-void fill_post_wcc(struct svc_fh *fhp)
-{
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode = d_inode(fhp->fh_dentry);
-
- if (fhp->fh_no_wcc)
- return;
-
- if (fhp->fh_post_saved)
- printk("nfsd: inode locked twice during operation.\n");
-
- fhp->fh_post_saved = true;
-
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr);
-
- if (err) {
- fhp->fh_post_saved = false;
- fhp->fh_post_attr.ctime = inode->i_ctime;
- }
- }
- if (v4)
- fhp->fh_post_change =
- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
-}
-
/*
* XDR decode functions
*/
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a36261f89bdf..ed1ee25647be 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
- nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
+ nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
@@ -1101,7 +1101,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
@@ -1510,11 +1510,14 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
+ struct file *dst = copy->nf_dst->nf_file;
+ struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
ssize_t bytes_copied = 0;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
u64 dst_pos = copy->cp_dst_pos;
- __be32 status;
+ int status;
/* See RFC 7862 p.67: */
if (bytes_total == 0)
@@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
- src_pos, copy->nf_dst->nf_file, dst_pos,
- bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
} while (bytes_total > 0 && !copy->cp_synchronous);
/* for a non-zero asynchronous copy do a commit of data */
if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
- down_write(&copy->nf_dst->nf_rwsem);
- status = vfs_fsync_range(copy->nf_dst->nf_file,
- copy->cp_dst_pos,
+ since = READ_ONCE(dst->f_wb_err);
+ status = vfs_fsync_range(dst, copy->cp_dst_pos,
copy->cp_res.wr_bytes_written, 0);
- up_write(&copy->nf_dst->nf_rwsem);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
copy->committed = true;
}
@@ -2528,7 +2530,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
- fh_clear_wcc(current_fh);
+ fh_clear_pre_post_attrs(current_fh);
/* If op is non-idempotent */
if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1956d377d1a6..72900b89cf84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -246,6 +246,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
if (fh_match(fh, &cur->nbl_fh)) {
list_del_init(&cur->nbl_list);
+ WARN_ON(list_empty(&cur->nbl_lru));
list_del_init(&cur->nbl_lru);
found = cur;
break;
@@ -271,6 +272,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
+ kref_init(&nbl->nbl_kref);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
&nfsd4_cb_notify_lock_ops,
NFSPROC4_CLNT_CB_NOTIFY_LOCK);
@@ -280,11 +282,20 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
}
static void
+free_nbl(struct kref *kref)
+{
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
+ kfree(nbl);
+}
+
+static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
- kfree(nbl);
+ kref_put(&nbl->nbl_kref, free_nbl);
}
static void
@@ -302,6 +313,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
struct nfsd4_blocked_lock,
nbl_list);
list_del_init(&nbl->nbl_list);
+ WARN_ON(list_empty(&nbl->nbl_lru));
list_move(&nbl->nbl_lru, &reaplist);
}
spin_unlock(&nn->blocked_locks_lock);
@@ -360,11 +372,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* st_{access,deny}_bmap field of the stateid, in order to track not
* only what share bits are currently in force, but also what
* combinations of share bits previous opens have used. This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
+ * to enforce the recommendation in
+ * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
+ * the server return an error if the client attempt to downgrade to a
+ * combination of share bits not explicable by closing some of its
+ * previous opens.
*
- * XXX: This enforcement is actually incomplete, since we don't keep
+ * This enforcement is arguably incomplete, since we don't keep
* track of access/deny bit combinations; so, e.g., we allow:
*
* OPEN allow read, deny write
@@ -372,6 +386,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* DOWNGRADE allow read, deny none
*
* which we should reject.
+ *
+ * But you could also argue that our current code is already overkill,
+ * since it only exists to return NFS4ERR_INVAL on incorrect client
+ * behavior.
*/
static unsigned int
bmap_to_share_mode(unsigned long bmap)
@@ -6040,7 +6058,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- status = check_special_stateids(net, fhp, stateid, flags);
+ if (cstid)
+ status = nfserr_bad_stateid;
+ else
+ status = check_special_stateids(net, fhp, stateid,
+ flags);
goto done;
}
@@ -6836,7 +6858,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6858,7 +6879,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6910,8 +6930,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6923,8 +6942,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -6945,6 +6963,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
+ /*
+ * Most filesystems with their own ->lock operations will block
+ * the nfsd thread waiting to acquire the lock. That leads to
+ * deadlocks (we don't want every nfsd thread tied up waiting
+ * for file locks), so don't attempt blocking lock notifications
+ * on those filesystems:
+ */
+ if (nf->nf_file->f_op->lock)
+ fl_flags &= ~FL_SLEEP;
+
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
dprintk("NFSD: %s: unable to allocate block!\n", __func__);
@@ -6975,6 +7003,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
+ kref_get(&nbl->nbl_kref);
spin_unlock(&nn->blocked_locks_lock);
}
@@ -6987,6 +7016,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
+ kref_put(&nbl->nbl_kref, free_nbl);
nbl = NULL;
fallthrough;
case -EAGAIN: /* conflock holds conflicting lock */
@@ -7007,8 +7037,13 @@ out:
/* dequeue it if we queued it before */
if (fl_flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock);
- list_del_init(&nbl->nbl_list);
- list_del_init(&nbl->nbl_lru);
+ if (!list_empty(&nbl->nbl_list) &&
+ !list_empty(&nbl->nbl_lru)) {
+ list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
+ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+ /* nbl can use one of lists to be linked to reaplist */
spin_unlock(&nn->blocked_locks_lock);
}
free_blocked_lock(nbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a93a5db4fb0..899de438e529 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -277,21 +277,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
static __be32
nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
{
- u32 i, count;
- __be32 *p;
-
- if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
- return nfserr_bad_xdr;
- /* request sanity */
- if (count > 1000)
- return nfserr_bad_xdr;
- p = xdr_inline_decode(argp->xdr, count << 2);
- if (!p)
- return nfserr_bad_xdr;
- for (i = 0; i < bmlen; i++)
- bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
+ ssize_t status;
- return nfs_ok;
+ status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
+ return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
}
static __be32
@@ -4804,8 +4793,8 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
return nfserr_resource;
*p++ = htonl(NFS4_CONTENT_HOLE);
- p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ p = xdr_encode_hyper(p, count);
*eof = (read->rd_offset + count) >= f_size;
*maxcount = min_t(unsigned long, count, *maxcount);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6e0b6f3148dc..a4a69ab6ab28 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -87,7 +87,7 @@ nfsd_hashsize(unsigned int limit)
static u32
nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), nn->maskbits);
+ return hash_32((__force u32)xid, nn->maskbits);
}
static struct svc_cacherep *
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51a49e0cfe37..b9f27fbcd768 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -742,13 +742,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
return err;
err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
- if (err < 0) {
- nfsd_destroy(net);
- return err;
- }
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (err >= 0 &&
+ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return err;
}
@@ -783,8 +782,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return 0;
out_close:
xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
@@ -793,10 +794,7 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- if (!list_empty(&nn->nfsd_serv->sv_permsocks))
- nn->nfsd_serv->sv_nrthreads--;
- else
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -1485,9 +1483,8 @@ static __net_init int nfsd_init_net(struct net *net)
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
- atomic_set(&nn->ntf_refcnt, 0);
- init_waitqueue_head(&nn->ntf_wq);
- seqlock_init(&nn->boot_lock);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
return 0;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 498e5a489826..3e5008b475ff 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -97,7 +97,7 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_destroy(struct net *net);
+void nfsd_put(struct net *net);
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f3779fa72c89..145208bcb9bd 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -611,6 +611,70 @@ out_negative:
return nfserr_serverfault;
}
+#ifdef CONFIG_NFSD_V3
+
+/**
+ * fh_fill_pre_attrs - Fill in pre-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode;
+ struct kstat stat;
+ __be32 err;
+
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+ return;
+
+ inode = d_inode(fhp->fh_dentry);
+ err = fh_getattr(fhp, &stat);
+ if (err) {
+ /* Grab the times from inode anyway */
+ stat.mtime = inode->i_mtime;
+ stat.ctime = inode->i_ctime;
+ stat.size = inode->i_size;
+ }
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
+ fhp->fh_pre_size = stat.size;
+ fhp->fh_pre_saved = true;
+}
+
+/**
+ * fh_fill_post_attrs - Fill in post-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ __be32 err;
+
+ if (fhp->fh_no_wcc)
+ return;
+
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+ err = fh_getattr(fhp, &fhp->fh_post_attr);
+ if (err) {
+ fhp->fh_post_saved = false;
+ fhp->fh_post_attr.ctime = inode->i_ctime;
+ } else
+ fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+}
+
+#endif /* CONFIG_NFSD_V3 */
+
/*
* Release a file handle.
*/
@@ -623,7 +687,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
- fh_clear_wcc(fhp);
+ fh_clear_pre_post_attrs(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index d11e4b6870d6..434930d8a946 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -284,12 +284,13 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
#endif
#ifdef CONFIG_NFSD_V3
-/*
- * The wcc data stored in current_fh should be cleared
- * between compound ops.
+
+/**
+ * fh_clear_pre_post_attrs - Reset pre/post attributes
+ * @fhp: file handle to be updated
+ *
*/
-static inline void
-fh_clear_wcc(struct svc_fh *fhp)
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
{
fhp->fh_post_saved = false;
fhp->fh_pre_saved = false;
@@ -323,13 +324,24 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
return time_to_chattr(&stat->ctime);
}
-extern void fill_pre_wcc(struct svc_fh *fhp);
-extern void fill_post_wcc(struct svc_fh *fhp);
-#else
-#define fh_clear_wcc(ignored)
-#define fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
+extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+extern void fh_fill_post_attrs(struct svc_fh *fhp);
+
+#else /* !CONFIG_NFSD_V3 */
+
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+}
+
+#endif /* !CONFIG_NFSD_V3 */
/*
@@ -355,7 +367,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
inode = d_inode(dentry);
inode_lock_nested(inode, subclass);
- fill_pre_wcc(fhp);
+ fh_fill_pre_attrs(fhp);
fhp->fh_locked = true;
}
@@ -372,7 +384,7 @@ static inline void
fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
- fill_post_wcc(fhp);
+ fh_fill_post_attrs(fhp);
inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index de282f3273c5..18b8eb43a19b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -235,10 +235,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
argp->len, argp->offset);
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
@@ -247,7 +243,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
return rpc_drop_reply;
-out:
return rpc_success;
}
@@ -850,6 +845,7 @@ nfserrno (int errno)
{ nfserr_io, -EIO },
{ nfserr_nxio, -ENXIO },
{ nfserr_fbig, -E2BIG },
+ { nfserr_stale, -EBADF },
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
@@ -878,6 +874,8 @@ nfserrno (int errno)
{ nfserr_toosmall, -ETOOSMALL },
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 80431921e5d7..b8c682b62d29 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/fs_struct.h>
#include <linux/swap.h>
+#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
@@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *,
struct svc_process_info *);
/*
- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
- * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
- * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
+ * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
*
* If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
- * of nfsd threads must exist and each must listed in ->sp_all_threads in each
- * entry of ->sv_pools[].
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
+ * nn->keep_active is set). That number of nfsd threads must
+ * exist and each must be listed in ->sp_all_threads in some entry of
+ * ->sv_pools[].
*
- * Transitions of the thread count between zero and non-zero are of particular
- * interest since the svc_serv needs to be created and initialized at that
- * point, or freed.
+ * Each active thread holds a counted reference on nn->nfsd_serv, as does
+ * the nn->keep_active flag and various transient calls to svc_get().
*
* Finally, the nfsd_mutex also protects some of the global variables that are
* accessed when nfsd starts and that are settable via the write_* routines in
@@ -345,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+/**
+ * nfsd_copy_write_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+ * namespace's write verifier without tearing it.
+ */
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
- read_seqbegin_or_lock(&nn->boot_lock, &seq);
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- } while (need_seqretry(&nn->boot_lock, seq));
- done_seqretry(&nn->boot_lock, seq);
+ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
+ memcpy(verf, nn->writeverf, sizeof(*verf));
+ } while (need_seqretry(&nn->writeverf_lock, seq));
+ done_seqretry(&nn->writeverf_lock, seq);
}
-static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
{
- ktime_get_real_ts64(&nn->nfssvc_boot);
+ struct timespec64 now;
+ u64 verf;
+
+ /*
+ * Because the time value is hashed, y2038 time_t overflow
+ * is irrelevant in this usage.
+ */
+ ktime_get_raw_ts64(&now);
+ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
+ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
}
-void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+/**
+ * nfsd_reset_write_verifier - Generate a new write verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+ * contains an opaque cookie that, according to Section 18.32.3 of
+ * RFC 8881, "the client can use to determine whether a server has
+ * changed instance state (e.g., server restart) between a call to
+ * WRITE and a subsequent call to either WRITE or COMMIT. This
+ * cookie MUST be unchanged during a single instance of the NFSv4.1
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
+void nfsd_reset_write_verifier(struct nfsd_net *nn)
{
- write_seqlock(&nn->boot_lock);
- nfsd_reset_boot_verifier_locked(nn);
- write_sequnlock(&nn->boot_lock);
+ write_seqlock(&nn->writeverf_lock);
+ nfsd_reset_write_verifier_locked(nn);
+ write_sequnlock(&nn->writeverf_lock);
}
static int nfsd_startup_net(struct net *net, const struct cred *cred)
@@ -435,6 +459,7 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+static DEFINE_SPINLOCK(nfsd_notifier_lock);
static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -444,18 +469,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
out:
return NOTIFY_DONE;
@@ -475,10 +499,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
@@ -487,8 +511,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
+
out:
return NOTIFY_DONE;
}
@@ -505,7 +529,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- atomic_dec(&nn->ntf_refcnt);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -513,7 +536,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
/*
* write_ports can create the server without actually starting
@@ -594,20 +616,9 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads,
.svo_module = THIS_MODULE,
};
-static void nfsd_complete_shutdown(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
-
- nn->nfsd_serv = NULL;
- complete(&nn->nfsd_shutdown_complete);
-}
-
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -622,11 +633,9 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
- serv->sv_ops->svo_setup(serv, NULL, 0);
- nfsd_destroy(net);
+ svc_set_num_threads(serv, NULL, 0);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
- /* Wait for shutdown of nfsd_serv to complete */
- wait_for_completion(&nn->nfsd_shutdown_complete);
}
bool i_am_nfsd(void)
@@ -638,6 +647,7 @@ int nfsd_create_serv(struct net *net)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nn->nfsd_serv) {
@@ -647,19 +657,23 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- &nfsd_thread_sv_ops);
- if (nn->nfsd_serv == NULL)
+ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+ &nfsd_thread_sv_ops);
+ if (serv == NULL)
return -ENOMEM;
- init_completion(&nn->nfsd_shutdown_complete);
- nn->nfsd_serv->sv_maxconn = nn->max_connections;
- error = svc_bind(nn->nfsd_serv, net);
+ serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(serv, net);
if (error < 0) {
- svc_destroy(nn->nfsd_serv);
- nfsd_complete_shutdown(net);
+ /* NOT nfsd_put() as notifiers (see below) haven't
+ * been set up yet.
+ */
+ svc_put(serv);
return error;
}
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = serv;
+ spin_unlock(&nfsd_notifier_lock);
set_max_drc();
/* check if the notifier is already set */
@@ -669,8 +683,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- atomic_inc(&nn->ntf_refcnt);
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
return 0;
}
@@ -697,16 +710,26 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-void nfsd_destroy(struct net *net)
+/* This is the callback for kref_put() below.
+ * There is no code here as the first thing to be done is
+ * call svc_shutdown_net(), but we cannot get the 'net' from
+ * the kref. So do all the work when kref_put returns true.
+ */
+static void nfsd_noop(struct kref *ref)
+{
+}
+
+void nfsd_put(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
- if (destroy)
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
svc_shutdown_net(nn->nfsd_serv, net);
- svc_destroy(nn->nfsd_serv);
- if (destroy)
- nfsd_complete_shutdown(net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
+ }
}
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
@@ -733,7 +756,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (tot > NFSD_MAXSERVS) {
/* total too large: scale down requested numbers */
for (i = 0; i < n && tot > 0; i++) {
- int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
tot -= (nthreads[i] - new);
nthreads[i] = new;
}
@@ -753,12 +776,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
/* apply the new numbers */
svc_get(nn->nfsd_serv);
for (i = 0; i < n; i++) {
- err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- &nn->nfsd_serv->sv_pools[i], nthreads[i]);
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ nthreads[i]);
if (err)
break;
}
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -795,21 +819,19 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
error = nfsd_startup_net(net, cred);
if (error)
- goto out_destroy;
- error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- NULL, nrservs);
+ goto out_put;
+ error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
if (error)
goto out_shutdown;
- /* We are holding a reference to nn->nfsd_serv which
- * we don't want to count in the return value,
- * so subtract 1
- */
- error = nn->nfsd_serv->sv_nrthreads - 1;
+ error = nn->nfsd_serv->sv_nrthreads;
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
-out_destroy:
- nfsd_destroy(net); /* Release server */
+out_put:
+ /* Threads now hold service active */
+ if (xchg(&nn->keep_active, 0))
+ nfsd_put(net);
+ nfsd_put(net);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -923,9 +945,6 @@ nfsd(void *vrqstp)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int err;
- /* Lock module and set up kernel thread */
- mutex_lock(&nfsd_mutex);
-
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
* umask as defined by the client instead of init's umask. */
@@ -945,8 +964,7 @@ nfsd(void *vrqstp)
allow_signal(SIGINT);
allow_signal(SIGQUIT);
- nfsdstats.th_cnt++;
- mutex_unlock(&nfsd_mutex);
+ atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -973,20 +991,36 @@ nfsd(void *vrqstp)
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);
- mutex_lock(&nfsd_mutex);
- nfsdstats.th_cnt --;
+ atomic_dec(&nfsdstats.th_cnt);
out:
- rqstp->rq_server = NULL;
+ /* Take an extra ref so that the svc_put in svc_exit_thread()
+ * doesn't call svc_destroy()
+ */
+ svc_get(nn->nfsd_serv);
/* Release the thread */
svc_exit_thread(rqstp);
- nfsd_destroy(net);
+ /* We need to drop a ref, but may not drop the last reference
+ * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
+ * could deadlock with nfsd_shutdown_threads() waiting for us.
+ * So three options are:
+ * - drop a non-final reference,
+ * - get the mutex without waiting
+ * - sleep briefly andd try the above again
+ */
+ while (!svc_put_not_last(nn->nfsd_serv)) {
+ if (mutex_trylock(&nfsd_mutex)) {
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ break;
+ }
+ msleep(20);
+ }
/* Release module */
- mutex_unlock(&nfsd_mutex);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -1096,7 +1130,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
mutex_unlock(&nfsd_mutex);
return -ENODEV;
}
- /* bump up the psudo refcount while traversing */
svc_get(nn->nfsd_serv);
ret = svc_pool_stats_open(nn->nfsd_serv, file);
mutex_unlock(&nfsd_mutex);
@@ -1109,8 +1142,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
struct net *net = inode->i_sb->s_fs_info;
mutex_lock(&nfsd_mutex);
- /* this function really, really should have been called svc_put() */
- nfsd_destroy(net);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
return ret;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e73bdbb1634a..95457cfd37fc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -568,6 +568,10 @@ struct nfs4_ol_stateid {
struct list_head st_locks;
struct nfs4_stateowner *st_stateowner;
struct nfs4_clnt_odstate *st_clnt_odstate;
+/*
+ * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
+ * comment above bmap_to_share_mode() for explanation:
+ */
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
@@ -629,6 +633,7 @@ struct nfsd4_blocked_lock {
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
+ struct kref nbl_kref;
};
struct nfsd4_compound_state;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1d3b881e7382..a8c5a02a84f0 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
- seq_printf(seq, "th %u 0", nfsdstats.th_cnt);
+ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 51ecda852e23..9b43dc3d9991 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -29,11 +29,9 @@ enum {
struct nfsd_stats {
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
- /* Protected by nfsd_mutex */
- unsigned int th_cnt; /* number of available threads */
+ atomic_t th_cnt; /* number of available threads */
};
-
extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index f1e0d3c51bc2..c4cf56327843 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -47,7 +47,7 @@
rqstp->rq_xprt->xpt_remotelen); \
} while (0);
-TRACE_EVENT(nfsd_garbage_args_err,
+DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
TP_PROTO(
const struct svc_rqst *rqstp
),
@@ -69,27 +69,13 @@ TRACE_EVENT(nfsd_garbage_args_err,
)
);
-TRACE_EVENT(nfsd_cant_encode_err,
- TP_PROTO(
- const struct svc_rqst *rqstp
- ),
- TP_ARGS(rqstp),
- TP_STRUCT__entry(
- NFSD_TRACE_PROC_ARG_FIELDS
-
- __field(u32, vers)
- __field(u32, proc)
- ),
- TP_fast_assign(
- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
+ TP_PROTO(const struct svc_rqst *rqstp), \
+ TP_ARGS(rqstp))
- __entry->vers = rqstp->rq_vers;
- __entry->proc = rqstp->rq_proc;
- ),
- TP_printk("xid=0x%08x vers=%u proc=%u",
- __entry->xid, __entry->vers, __entry->proc
- )
-);
+DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
+DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
#define show_nfsd_may_flags(x) \
__print_flags(x, "|", \
@@ -413,6 +399,56 @@ TRACE_EVENT(nfsd_dirent,
)
)
+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
#include "filecache.h"
#include "vfs.h"
@@ -538,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
DEFINE_NET_EVENT(grace_start);
DEFINE_NET_EVENT(grace_complete);
+TRACE_EVENT(nfsd_writeverf_reset,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int error
+ ),
+ TP_ARGS(nn, rqstp, error),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(int, error)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->error = error;
+
+ /* avoid seqlock inside TP_fast_assign */
+ memcpy(__entry->verifier, nn->writeverf,
+ NFS4_VERIFIER_SIZE);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+ __entry->boot_time, __entry->xid, __entry->error,
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
TRACE_EVENT(nfsd_clid_cred_mismatch,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c99857689e2c..99c2b9dfbb10 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -40,6 +40,7 @@
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
@@ -517,15 +518,23 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+{
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
+}
+
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
__be32 ret = 0;
- down_write(&nf_dst->nf_rwsem);
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) {
ret = nfserrno(cloned);
@@ -540,15 +549,25 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
- nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
- nfsd_net_id));
+ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+ nfsd_net_id);
+
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, status);
ret = nfserrno(status);
}
}
out_err:
- up_write(&nf_dst->nf_rwsem);
return ret;
}
@@ -777,6 +796,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
__be32 err;
+ bool retried = false;
validate_process_creds();
/*
@@ -792,9 +812,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+retry:
err = fh_verify(rqstp, fhp, type, may_flags);
- if (!err)
+ if (!err) {
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ if (err == nfserr_stale && !retried) {
+ retried = true;
+ fh_put(fhp);
+ goto retry;
+ }
+ }
validate_process_creds();
return err;
}
@@ -944,10 +971,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
unsigned long *cnt, int stable,
__be32 *verf)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -985,36 +1014,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- if (flags & RWF_SYNC) {
- down_write(&nf->nf_rwsem);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- up_write(&nf->nf_rwsem);
- } else {
- down_read(&nf->nf_rwsem);
- if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- up_read(&nf->nf_rwsem);
- }
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+ nfsd_copy_write_verifier(verf, nn);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
goto out_nfserr;
}
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+ }
}
out_nfserr:
@@ -1089,19 +1110,6 @@ out:
}
#ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
- loff_t end)
-{
- struct address_space *mapping = nf->nf_file->f_mapping;
- int ret = filemap_fdatawrite_range(mapping, offset, end);
-
- if (ret)
- return ret;
- filemap_fdatawait_range_keep_errors(mapping, offset, end);
- return 0;
-}
-
/*
* Commit all pending writes to stable storage.
*
@@ -1115,6 +1123,7 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count, __be32 *verf)
{
+ struct nfsd_net *nn;
struct nfsd_file *nf;
loff_t end = LLONG_MAX;
__be32 err = nfserr_inval;
@@ -1131,29 +1140,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+ nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
- down_write(&nf->nf_rwsem);
- if (!err2)
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
break;
case -EINVAL:
err = nfserr_notsupp;
break;
default:
- err = nfserrno(err2);
- nfsd_reset_boot_verifier(net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, err2);
}
- up_write(&nf->nf_rwsem);
+ err = nfserrno(err2);
} else
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
nfsd_file_put(nf);
out:
@@ -1747,8 +1755,8 @@ retry:
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = true;
- fill_pre_wcc(ffhp);
- fill_pre_wcc(tfhp);
+ fh_fill_pre_attrs(ffhp);
+ fh_fill_pre_attrs(tfhp);
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1808,8 +1816,8 @@ retry:
* were the same, so again we do it by hand.
*/
if (!close_cached) {
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ fh_fill_post_attrs(ffhp);
+ fh_fill_post_attrs(tfhp);
}
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b21b76e6b9a8..9f56dcb22ff7 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
struct nfsd_file *nf_dst, u64 dst_pos,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index bc3e2cd4117f..063dd16d75b5 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -195,12 +195,12 @@ void nilfs_page_bug(struct page *page)
*/
static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
{
- struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
+ struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
BUG_ON(PageWriteback(dst));
- sbh = sbufs = page_buffers(src);
+ sbh = page_buffers(src);
if (!page_has_buffers(dst))
create_empty_buffers(dst, sbh->b_size, 0);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d5ebebb034ff..829dd4a61b66 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -19,7 +19,25 @@
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
-int dir_notify_enable __read_mostly = 1;
+static int dir_notify_enable __read_mostly = 1;
+#ifdef CONFIG_SYSCTL
+static struct ctl_table dnotify_sysctls[] = {
+ {
+ .procname = "dir-notify-enable",
+ .data = &dir_notify_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+static void __init dnotify_sysctl_init(void)
+{
+ register_sysctl_init("fs", dnotify_sysctls);
+}
+#else
+#define dnotify_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_cache __read_mostly;
@@ -386,6 +404,7 @@ static int __init dnotify_init(void)
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
+ dnotify_sysctl_init();
return 0;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 73a3e939c921..73b1615f9d96 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -59,7 +59,7 @@ static int fanotify_max_queued_events __read_mostly;
static long ft_zero = 0;
static long ft_int_max = INT_MAX;
-struct ctl_table fanotify_table[] = {
+static struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
@@ -88,6 +88,13 @@ struct ctl_table fanotify_table[] = {
},
{ }
};
+
+static void __init fanotify_sysctls_init(void)
+{
+ register_sysctl("fs/fanotify", fanotify_table);
+}
+#else
+#define fanotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
/*
@@ -1743,6 +1750,7 @@ static int __init fanotify_user_setup(void)
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
FANOTIFY_DEFAULT_MAX_GROUPS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks;
+ fanotify_sysctls_init();
return 0;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 29fca3284bb5..54583f62dc44 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
static long it_zero = 0;
static long it_int_max = INT_MAX;
-struct ctl_table inotify_table[] = {
+static struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
@@ -87,6 +87,14 @@ struct ctl_table inotify_table[] = {
},
{ }
};
+
+static void __init inotify_sysctls_init(void)
+{
+ register_sysctl("fs/inotify", inotify_table);
+}
+
+#else
+#define inotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
@@ -849,6 +857,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
+ inotify_sysctls_init();
return 0;
}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index d563abc3e136..2911c04a33e0 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 8aaec7e0804e..fb825059d488 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -11,7 +11,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
-#include <linux/cleancache.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bb247bc349e4..bf9357123bc5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2040,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
int i, idx;
struct ocfs2_extent_list *el, *left_el, *right_el;
struct ocfs2_extent_rec *left_rec, *right_rec;
- struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
+ struct buffer_head *root_bh;
/*
* Update the counts and position values within all the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 68d11c295dd3..498da317580a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1799,20 +1799,20 @@ try_again:
*/
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
- if (ret && ret != -EAGAIN) {
- mlog_errno(ret);
- goto out_quota;
- }
+ if (ret) {
+ /*
+ * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
+ * the target page. In this case, we exit with no error and no target
+ * page. This will trigger the caller, page_mkwrite(), to re-try
+ * the operation.
+ */
+ if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) {
+ BUG_ON(wc->w_target_page);
+ ret = 0;
+ goto out_quota;
+ }
- /*
- * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
- * the target page. In this case, we exit with no error and no target
- * page. This will trigger the caller, page_mkwrite(), to re-try
- * the operation.
- */
- if (ret == -EAGAIN) {
- BUG_ON(wc->w_target_page);
- ret = 0;
+ mlog_errno(ret);
goto out_quota;
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f89ffcbd585f..a17be1618bf7 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
/* lowest node as master node to make negotiate decision. */
- master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
+ master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES);
if (master_node == o2nm_this_node()) {
if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 810d32815593..563881ddbf00 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -120,7 +120,8 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(KTHREAD),
};
-static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
+static struct attribute *mlog_default_attrs[MLOG_MAX_BITS] = {NULL, };
+ATTRIBUTE_GROUPS(mlog_default);
static ssize_t mlog_show(struct kobject *obj, struct attribute *attr,
char *buf)
@@ -144,8 +145,8 @@ static const struct sysfs_ops mlog_attr_ops = {
};
static struct kobj_type mlog_ktype = {
- .default_attrs = mlog_attr_ptrs,
- .sysfs_ops = &mlog_attr_ops,
+ .default_groups = mlog_default_groups,
+ .sysfs_ops = &mlog_attr_ops,
};
static struct kset mlog_kset = {
@@ -157,10 +158,10 @@ int mlog_sys_init(struct kset *o2cb_kset)
int i = 0;
while (mlog_attrs[i].attr.mode) {
- mlog_attr_ptrs[i] = &mlog_attrs[i].attr;
+ mlog_default_attrs[i] = &mlog_attrs[i].attr;
i++;
}
- mlog_attr_ptrs[i] = NULL;
+ mlog_default_attrs[i] = NULL;
kobject_set_name(&mlog_kset.kobj, "logmask");
mlog_kset.kobj.kset = o2cb_kset;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index bd8d534f11cb..f2cc1ff29e6d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3343,7 +3343,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dir_entry *de, *last_de = NULL;
char *de_buf, *limit;
unsigned long offset = 0;
- unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
+ unsigned int rec_len, new_rec_len, free_space;
/*
* This calculates how many free bytes we'd have in block zero, should
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9f90fc9551e1..c4eccd499db8 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
int status, ret = 0, i;
char *p;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
@@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
struct o2nm_node *node;
int ret = 0, status, count, i;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9b88219febb5..227da5b1b6ab 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -861,7 +861,7 @@ lookup:
* to see if there are any nodes that still need to be
* considered. these will not appear in the mle nodemap
* but they might own this lockres. wait on them. */
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -912,7 +912,7 @@ redo_request:
dlm_wait_for_recovery(dlm);
spin_lock(&dlm->spinlock);
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -1079,7 +1079,7 @@ recheck:
sleep = 1;
/* have all nodes responded? */
if (voting_done && !*blocked) {
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (dlm->node_num <= bit) {
/* my node number is lowest.
* now tell other nodes that I am
@@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
} else {
mlog(ML_ERROR, "node down! %d\n", node);
if (blocked) {
- int lowest = find_next_bit(mle->maybe_map,
- O2NM_MAX_NODES, 0);
+ int lowest = find_first_bit(mle->maybe_map,
+ O2NM_MAX_NODES);
/* act like it was never there */
clear_bit(node, mle->maybe_map);
@@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
"MLE for it! (%.*s)\n", assert->node_idx,
namelen, name);
} else {
- int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0);
+ int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES) {
/* not necessarily an error, though less likely.
* could be master just re-asserting. */
@@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
}
if (!nonlocal) {
- node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (node_ref >= O2NM_MAX_NODES)
return 0;
}
@@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
BUG_ON(mle->type != DLM_MLE_BLOCK);
spin_lock(&mle->spinlock);
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit != dead_node) {
mlog(0, "mle found, but dead node %u would not have been "
"master\n", dead_node);
@@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
spin_lock(&dlm->master_lock);
BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
- BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+ BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES));
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5cd5f7511dac..52ad342fec3e 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index c350bd4df770..eedf07ca23ca 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
return 0;
/* Another node has this resource with this node as the master */
- bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES)
return 0;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index de56e6231af8..1ad7106741f8 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -94,6 +94,7 @@ static struct attribute *ocfs2_filecheck_attrs[] = {
&ocfs2_filecheck_attr_set.attr,
NULL
};
+ATTRIBUTE_GROUPS(ocfs2_filecheck);
static void ocfs2_filecheck_release(struct kobject *kobj)
{
@@ -138,7 +139,7 @@ static const struct sysfs_ops ocfs2_filecheck_ops = {
};
static struct kobj_type ocfs2_ktype_filecheck = {
- .default_attrs = ocfs2_filecheck_attrs,
+ .default_groups = ocfs2_filecheck_groups,
.sysfs_ops = &ocfs2_filecheck_ops,
.release = ocfs2_filecheck_release,
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dbf9b9e97d74..1887a2708709 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1669,8 +1669,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
status = jbd2_journal_load(journal);
if (status < 0) {
mlog_errno(status);
- if (!igrab(inode))
- BUG();
+ BUG_ON(!igrab(inode));
jbd2_journal_destroy(journal);
goto done;
}
@@ -1699,8 +1698,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (status < 0)
mlog_errno(status);
- if (!igrab(inode))
- BUG();
+ BUG_ON(!igrab(inode));
jbd2_journal_destroy(journal);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 16f1bfc407f2..731558a6f27d 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -672,31 +672,8 @@ static struct ctl_table ocfs2_mod_table[] = {
{ }
};
-static struct ctl_table ocfs2_kern_table[] = {
- {
- .procname = "ocfs2",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_mod_table
- },
- { }
-};
-
-static struct ctl_table ocfs2_root_table[] = {
- {
- .procname = "fs",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_kern_table
- },
- { }
-};
-
static struct ctl_table_header *ocfs2_table_header;
-
/*
* Initialization
*/
@@ -705,7 +682,7 @@ static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
- ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
+ ocfs2_table_header = register_sysctl("fs/ocfs2", ocfs2_mod_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1286b88b6fa1..2772dec9dcea 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -25,7 +25,6 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
-#include <linux/cleancache.h>
#include <linux/signal.h>
#define CREATE_TRACE_POINTS
@@ -2283,7 +2282,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs(sb);
osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
if (!osb->ocfs2_wq) {
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 538e839590ef..b501dc07f922 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
{
kfree(bufmap->page_array);
kfree(bufmap->desc_array);
- kfree(bufmap->buffer_index_array);
+ bitmap_free(bufmap->buffer_index_array);
kfree(bufmap);
}
@@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
bufmap->desc_size = user_desc->size;
bufmap->desc_shift = ilog2(bufmap->desc_size);
- bufmap->buffer_index_array =
- kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL);
+ bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL);
if (!bufmap->buffer_index_array)
goto out_free_bufmap;
@@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
out_free_desc_array:
kfree(bufmap->desc_array);
out_free_index_array:
- kfree(bufmap->buffer_index_array);
+ bitmap_free(bufmap->buffer_index_array);
out_free_bufmap:
kfree(bufmap);
out:
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index 3627ea946402..de80b62553bb 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -894,10 +894,11 @@ static struct attribute *orangefs_default_attrs[] = {
&perf_time_interval_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(orangefs_default);
static struct kobj_type orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = orangefs_default_attrs,
+ .default_groups = orangefs_default_groups,
};
static struct orangefs_attribute acache_hard_limit_attribute =
@@ -931,10 +932,11 @@ static struct attribute *acache_orangefs_default_attrs[] = {
&acache_timeout_msecs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(acache_orangefs_default);
static struct kobj_type acache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = acache_orangefs_default_attrs,
+ .default_groups = acache_orangefs_default_groups,
};
static struct orangefs_attribute capcache_hard_limit_attribute =
@@ -968,10 +970,11 @@ static struct attribute *capcache_orangefs_default_attrs[] = {
&capcache_timeout_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(capcache_orangefs_default);
static struct kobj_type capcache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = capcache_orangefs_default_attrs,
+ .default_groups = capcache_orangefs_default_groups,
};
static struct orangefs_attribute ccache_hard_limit_attribute =
@@ -1005,10 +1008,11 @@ static struct attribute *ccache_orangefs_default_attrs[] = {
&ccache_timeout_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(ccache_orangefs_default);
static struct kobj_type ccache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = ccache_orangefs_default_attrs,
+ .default_groups = ccache_orangefs_default_groups,
};
static struct orangefs_attribute ncache_hard_limit_attribute =
@@ -1042,10 +1046,11 @@ static struct attribute *ncache_orangefs_default_attrs[] = {
&ncache_timeout_msecs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(ncache_orangefs_default);
static struct kobj_type ncache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = ncache_orangefs_default_attrs,
+ .default_groups = ncache_orangefs_default_groups,
};
static struct orangefs_attribute pc_acache_attribute =
@@ -1072,10 +1077,11 @@ static struct attribute *pc_orangefs_default_attrs[] = {
&pc_ncache_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(pc_orangefs_default);
static struct kobj_type pc_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = pc_orangefs_default_attrs,
+ .default_groups = pc_orangefs_default_groups,
};
static struct orangefs_attribute stats_reads_attribute =
@@ -1095,10 +1101,11 @@ static struct attribute *stats_orangefs_default_attrs[] = {
&stats_writes_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(stats_orangefs_default);
static struct kobj_type stats_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = stats_orangefs_default_attrs,
+ .default_groups = stats_orangefs_default_groups,
};
static struct kobject *orangefs_obj;
diff --git a/fs/pipe.c b/fs/pipe.c
index 6d4342bad9f1..cc28623a67b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,7 @@
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -50,13 +51,13 @@
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_size = 1048576;
+static unsigned int pipe_max_size = 1048576;
/* Maximum allocatable pages per user. Hard limit is unset by default, soft
* matches default values.
*/
-unsigned long pipe_user_pages_hard;
-unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+static unsigned long pipe_user_pages_hard;
+static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
/*
* We use head and tail indices that aren't masked off, except at the point of
@@ -1428,6 +1429,60 @@ static struct file_system_type pipe_fs_type = {
.kill_sb = kill_anon_super,
};
+#ifdef CONFIG_SYSCTL
+static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ if (write) {
+ unsigned int val;
+
+ val = round_pipe_size(*lvalp);
+ if (val == 0)
+ return -EINVAL;
+
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+static int proc_dopipe_max_size(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_dopipe_max_size_conv, NULL);
+}
+
+static struct ctl_table fs_pipe_sysctls[] = {
+ {
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
+ .maxlen = sizeof(pipe_max_size),
+ .mode = 0644,
+ .proc_handler = proc_dopipe_max_size,
+ },
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ { }
+};
+#endif
+
static int __init init_pipe_fs(void)
{
int err = register_filesystem(&pipe_fs_type);
@@ -1439,6 +1494,9 @@ static int __init init_pipe_fs(void)
unregister_filesystem(&pipe_fs_type);
}
}
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("fs", fs_pipe_sysctls);
+#endif
return err;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff869a66b34e..fd8b0c12b2cb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -92,6 +92,7 @@
#include <linux/string_helpers.h>
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include "internal.h"
@@ -102,6 +103,8 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
if (p->flags & PF_WQ_WORKER)
wq_worker_comm(tcomm, sizeof(tcomm), p);
+ else if (p->flags & PF_KTHREAD)
+ get_kthread_comm(tcomm, sizeof(tcomm), p);
else
__get_task_comm(tcomm, sizeof(tcomm), p);
@@ -468,6 +471,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
u64 cgtime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
+ int exit_code = task->exit_code;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -531,6 +535,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
maj_flt += sig->maj_flt;
thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
+
+ if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
+ exit_code = sig->group_exit_code;
}
sid = task_session_nr_ns(task, ns);
@@ -630,7 +637,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_puts(m, " 0 0 0 0 0 0 0");
if (permitted)
- seq_put_decimal_ll(m, " ", task->exit_code);
+ seq_put_decimal_ll(m, " ", exit_code);
else
seq_puts(m, " 0");
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 13eda8de2998..d654ce7150fd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -670,10 +670,10 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
/************************************************************************/
/* permission checks */
-static int proc_fd_access_allowed(struct inode *inode)
+static bool proc_fd_access_allowed(struct inode *inode)
{
struct task_struct *task;
- int allowed = 0;
+ bool allowed = false;
/* Allow access to a task's file descriptors if it is us or we
* may use ptrace attach to the process and find out that
* information.
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5b78739e60e4..f2132407e133 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -791,12 +791,6 @@ void proc_remove(struct proc_dir_entry *de)
}
EXPORT_SYMBOL(proc_remove);
-void *PDE_DATA(const struct inode *inode)
-{
- return __PDE_DATA(inode);
-}
-EXPORT_SYMBOL(PDE_DATA);
-
/*
* Pull a user buffer into memory and pass it to the file's write handler if
* one is supplied. The ->write() method is permitted to modify the
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 599eb724ff2d..f84355c5a36d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -650,6 +650,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return NULL;
}
+ inode->i_private = de->data;
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 03415f3fb3a8..06a80f78433d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -115,11 +115,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
return PROC_I(inode)->pde;
}
-static inline void *__PDE_DATA(const struct inode *inode)
-{
- return PDE(inode)->data;
-}
-
static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 39b823ab2564..e1cfeda397f3 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -138,7 +138,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* @parent: The parent directory in which to create.
* @ops: The seq_file ops with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode,
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* @parent: The parent directory in which to create.
* @show: The seqfile show method with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network-namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_single_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5d66faecd4ef..7d9cfc730bd4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
+#include <linux/kmemleak.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -25,15 +26,32 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535 };
EXPORT_SYMBOL(sysctl_vals);
+const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
+EXPORT_SYMBOL_GPL(sysctl_long_vals);
+
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
{ }
};
+/**
+ * register_sysctl_mount_point() - registers a sysctl mount point
+ * @path: path for the mount point
+ *
+ * Used to create a permanently empty directory to serve as mount point.
+ * There are some subtle but important permission checks this allows in the
+ * case of unprivileged mounts.
+ */
+struct ctl_table_header *register_sysctl_mount_point(const char *path)
+{
+ return register_sysctl(path, sysctl_mount_point);
+}
+EXPORT_SYMBOL(register_sysctl_mount_point);
+
static bool is_empty_dir(struct ctl_table_header *head)
{
return head->ctl_table[0].child == sysctl_mount_point;
@@ -163,7 +181,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
else {
pr_err("sysctl duplicate entry: ");
sysctl_print_dir(head->parent);
- pr_cont("/%s\n", entry->procname);
+ pr_cont("%s\n", entry->procname);
return -EEXIST;
}
}
@@ -1020,8 +1038,8 @@ failed:
if (IS_ERR(subdir)) {
pr_err("sysctl could not get directory: ");
sysctl_print_dir(dir);
- pr_cont("/%*.*s %ld\n",
- namelen, namelen, name, PTR_ERR(subdir));
+ pr_cont("%*.*s %ld\n", namelen, namelen, name,
+ PTR_ERR(subdir));
}
drop_sysctl_table(&dir->header);
if (new)
@@ -1053,7 +1071,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
struct ctl_dir *dir;
int ret;
- ret = 0;
spin_lock(&sysctl_lock);
root = (*pentry)->data;
set = lookup_header_set(root);
@@ -1384,6 +1401,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
}
EXPORT_SYMBOL(register_sysctl);
+/**
+ * __register_sysctl_init() - register sysctl table to path
+ * @path: path name for sysctl base
+ * @table: This is the sysctl table that needs to be registered to the path
+ * @table_name: The name of sysctl table, only used for log printing when
+ * registration fails
+ *
+ * The sysctl interface is used by userspace to query or modify at runtime
+ * a predefined value set on a variable. These variables however have default
+ * values pre-set. Code which depends on these variables will always work even
+ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
+ * ability to query or modify the sysctls dynamically at run time. Chances of
+ * register_sysctl() failing on init are extremely low, and so for both reasons
+ * this function does not return any error as it is used by initialization code.
+ *
+ * Context: Can only be called after your respective sysctl base path has been
+ * registered. So for instance, most base directories are registered early on
+ * init before init levels are processed through proc_sys_init() and
+ * sysctl_init_bases().
+ */
+void __init __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name)
+{
+ struct ctl_table_header *hdr = register_sysctl(path, table);
+
+ if (unlikely(!hdr)) {
+ pr_err("failed when register_sysctl %s to %s\n", table_name, path);
+ return;
+ }
+ kmemleak_not_leak(hdr);
+}
+
static char *append_path(const char *path, char *pos, const char *name)
{
int namelen;
@@ -1597,6 +1646,15 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
}
EXPORT_SYMBOL(register_sysctl_table);
+int __register_sysctl_base(struct ctl_table *base_table)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(base_table);
+ kmemleak_not_leak(hdr);
+ return 0;
+}
+
static void put_links(struct ctl_table_header *header)
{
struct ctl_table_set *root_set = &sysctl_table_root.default_set;
@@ -1626,7 +1684,7 @@ static void put_links(struct ctl_table_header *header)
else {
pr_err("sysctl link missing during unregister: ");
sysctl_print_dir(parent);
- pr_cont("/%s\n", name);
+ pr_cont("%s\n", name);
}
}
}
@@ -1710,7 +1768,7 @@ int __init proc_sys_init(void)
proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
- return sysctl_init();
+ return sysctl_init_bases();
}
struct sysctl_alias {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ad667dbc96f5..18f8c3acbb85 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
#include <linux/vmacache.h>
+#include <linux/mm_inline.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/mount.h>
@@ -308,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
+ const char *anon_name;
+
if (!mm) {
name = "[vdso]";
goto done;
@@ -319,8 +322,16 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done;
}
- if (is_stack(vma))
+ if (is_stack(vma)) {
name = "[stack]";
+ goto done;
+ }
+
+ anon_name = vma_anon_name(vma);
+ if (anon_name) {
+ seq_pad(m, ' ');
+ seq_printf(m, "[anon:%s]", anon_name);
+ }
}
done:
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 509f85148fee..702754dd1daf 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -65,8 +65,6 @@ static size_t vmcoredd_orig_sz;
static DECLARE_RWSEM(vmcore_cb_rwsem);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
-/* Whether we had a surprise unregistration of a callback. */
-static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;
@@ -94,10 +92,8 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
* very unusual (e.g., forced driver removal), but we cannot stop
* unregistering.
*/
- if (vmcore_opened) {
+ if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback unregistration\n");
- vmcore_cb_unstable = true;
- }
up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
@@ -108,8 +104,6 @@ static bool pfn_is_ram(unsigned long pfn)
bool ret = true;
lockdep_assert_held_read(&vmcore_cb_rwsem);
- if (unlikely(vmcore_cb_unstable))
- return false;
list_for_each_entry(cb, &vmcore_cb_list, next) {
if (unlikely(!cb->pfn_is_ram))
@@ -581,7 +575,7 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
* looping over all pages without a reason.
*/
down_read(&vmcore_cb_rwsem);
- if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
+ if (!list_empty(&vmcore_cb_list))
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 65ce0e72e7b9..e20d1484c663 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -155,11 +155,12 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
int nonblock)
{
+ enum pid_type type;
ssize_t ret;
DECLARE_WAITQUEUE(wait, current);
spin_lock_irq(&current->sighand->siglock);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ ret = dequeue_signal(current, &ctx->sigmask, info, &type);
switch (ret) {
case 0:
if (!nonblock)
@@ -174,7 +175,7 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
add_wait_queue(&current->sighand->signalfd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ ret = dequeue_signal(current, &ctx->sigmask, info, &type);
if (ret != 0)
break;
if (signal_pending(current)) {
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h
index 7ccadcbe684b..38b8fc514860 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smbfs_common/smb2pdu.h
@@ -449,7 +449,7 @@ struct smb2_netname_neg_context {
*/
/* Flags */
-#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001
+#define SMB2_ACCEPT_TRANSPORT_LEVEL_SECURITY 0x00000001
struct smb2_transport_capabilities_context {
__le16 ContextType; /* 6 */
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h
index 926f87cd6af0..d51939c43ad7 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smbfs_common/smbfsctl.h
@@ -95,8 +95,10 @@
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */
+#define FSCTL_SET_INTEGRITY_INFORMATION_EXT 0x00090380
#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
+#define FSCTL_REFS_STREAM_SNAPSHOT_MANAGEMENT 0x00090440
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index bb44ff4c5cc6..b1b556dbce12 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/magic.h>
#include <linux/xattr.h>
+#include <linux/backing-dev.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -112,6 +113,24 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
return decompressor;
}
+static int squashfs_bdi_init(struct super_block *sb)
+{
+ int err;
+ unsigned int major = MAJOR(sb->s_dev);
+ unsigned int minor = MINOR(sb->s_dev);
+
+ bdi_put(sb->s_bdi);
+ sb->s_bdi = &noop_backing_dev_info;
+
+ err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor);
+ if (err)
+ return err;
+
+ sb->s_bdi->ra_pages = 0;
+ sb->s_bdi->io_pages = 0;
+
+ return 0;
+}
static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
@@ -127,6 +146,20 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
TRACE("Entered squashfs_fill_superblock\n");
+ /*
+ * squashfs provides 'backing_dev_info' in order to disable read-ahead. For
+ * squashfs, I/O is not deferred, it is done immediately in readpage,
+ * which means the user would always have to wait their own I/O. So the effect
+ * of readahead is very weak for squashfs. squashfs_bdi_init will set
+ * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for
+ * squashfs.
+ */
+ err = squashfs_bdi_init(sb);
+ if (err) {
+ errorf(fc, "squashfs init bdi failed");
+ return err;
+ }
+
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
if (sb->s_fs_info == NULL) {
ERROR("Failed to allocate squashfs_sb_info\n");
diff --git a/fs/super.c b/fs/super.c
index 3bfc0f8fbd5b..7af820ba5ad5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,7 +31,6 @@
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
-#include <linux/cleancache.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
@@ -260,7 +259,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_gran = 1000000000;
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
- s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
@@ -330,7 +328,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
@@ -1423,8 +1420,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
}
EXPORT_SYMBOL(mount_nodev);
-static int reconfigure_single(struct super_block *s,
- int flags, void *data)
+int reconfigure_single(struct super_block *s,
+ int flags, void *data)
{
struct fs_context *fc;
int ret;
diff --git a/fs/sysctls.c b/fs/sysctls.c
new file mode 100644
index 000000000000..c701273c9432
--- /dev/null
+++ b/fs/sysctls.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/sys/fs shared sysctls
+ *
+ * These sysctls are shared between different filesystems.
+ */
+#include <linux/init.h>
+#include <linux/sysctl.h>
+
+static struct ctl_table fs_shared_sysctls[] = {
+ {
+ .procname = "overflowuid",
+ .data = &fs_overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &fs_overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ { }
+};
+
+DECLARE_SYSCTL_BASE(fs, fs_shared_sysctls);
+
+static int __init init_fs_sysctls(void)
+{
+ return register_sysctl_base(fs);
+}
+
+early_initcall(init_fs_sysctls);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 3616839c5c4b..bafc02bf8220 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -109,12 +109,12 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
* also the directory that is being deleted.
*/
inode_unlock(inode);
- inode_unlock(dentry->d_inode);
+ inode_unlock(d_inode(dentry));
ret = tracefs_ops.rmdir(name);
inode_lock_nested(inode, I_MUTEX_PARENT);
- inode_lock(dentry->d_inode);
+ inode_lock(d_inode(dentry));
kfree(name);
@@ -284,7 +284,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
static int tracefs_apply_options(struct super_block *sb)
{
struct tracefs_fs_info *fsi = sb->s_fs_info;
- struct inode *inode = sb->s_root->d_inode;
+ struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
inode->i_mode &= ~S_IALLUGO;
@@ -403,18 +403,18 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = tracefs_mount->mnt_root;
- inode_lock(parent->d_inode);
- if (unlikely(IS_DEADDIR(parent->d_inode)))
+ inode_lock(d_inode(parent));
+ if (unlikely(IS_DEADDIR(d_inode(parent))))
dentry = ERR_PTR(-ENOENT);
else
dentry = lookup_one_len(name, parent, strlen(name));
- if (!IS_ERR(dentry) && dentry->d_inode) {
+ if (!IS_ERR(dentry) && d_inode(dentry)) {
dput(dentry);
dentry = ERR_PTR(-EEXIST);
}
if (IS_ERR(dentry)) {
- inode_unlock(parent->d_inode);
+ inode_unlock(d_inode(parent));
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -423,7 +423,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- inode_unlock(dentry->d_parent->d_inode);
+ inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
@@ -431,7 +431,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- inode_unlock(dentry->d_parent->d_inode);
+ inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
@@ -489,7 +489,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_uid = d_inode(dentry->d_parent)->i_uid;
inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
- fsnotify_create(dentry->d_parent->d_inode, dentry);
+ fsnotify_create(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
}
@@ -516,8 +516,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_instantiate(dentry, inode);
- inc_nlink(dentry->d_parent->d_inode);
- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ inc_nlink(d_inode(dentry->d_parent));
+ fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
}
diff --git a/fs/unicode/.gitignore b/fs/unicode/.gitignore
index 361294571ab0..51cdf3fb4dd4 100644
--- a/fs/unicode/.gitignore
+++ b/fs/unicode/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/mkutf8data
-/utf8data.h
+/utf8data.c
diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig
index 2c27b9a5cd6c..610d7bc05d6e 100644
--- a/fs/unicode/Kconfig
+++ b/fs/unicode/Kconfig
@@ -8,7 +8,16 @@ config UNICODE
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.
+config UNICODE_UTF8_DATA
+ tristate "UTF-8 normalization and casefolding tables"
+ depends on UNICODE
+ default UNICODE
+ help
+ This contains a large table of case foldings, which can be loaded as
+ a separate module if you say M here. To be on the safe side stick
+ to the default of Y. Saying N here makes no sense, if you do not want
+ utf8 casefolding support, disable CONFIG_UNICODE instead.
+
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
- depends on UNICODE
- default n
+ depends on UNICODE_UTF8_DATA
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index b88aecc86550..2f9d9188852b 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -2,14 +2,15 @@
obj-$(CONFIG_UNICODE) += unicode.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
+obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o
unicode-y := utf8-norm.o utf8-core.o
-$(obj)/utf8-norm.o: $(obj)/utf8data.h
+$(obj)/utf8-data.o: $(obj)/utf8data.c
-# In the normal build, the checked-in utf8data.h is just shipped.
+# In the normal build, the checked-in utf8data.c is just shipped.
#
-# To generate utf8data.h from UCD, put *.txt files in this directory
+# To generate utf8data.c from UCD, put *.txt files in this directory
# and pass REGENERATE_UTF8DATA=1 from the command line.
ifdef REGENERATE_UTF8DATA
@@ -24,15 +25,15 @@ quiet_cmd_utf8data = GEN $@
-t $(srctree)/$(src)/NormalizationTest.txt \
-o $@
-$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
+$(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
$(call if_changed,utf8data)
else
-$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
+$(obj)/utf8data.c: $(src)/utf8data.c_shipped FORCE
$(call if_changed,shipped)
endif
-targets += utf8data.h
+targets += utf8data.c
hostprogs += mkutf8data
diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c
index ff2025ac5a32..bc1a7c8b5c8d 100644
--- a/fs/unicode/mkutf8data.c
+++ b/fs/unicode/mkutf8data.c
@@ -3287,12 +3287,10 @@ static void write_file(void)
open_fail(utf8_name, errno);
fprintf(file, "/* This file is generated code, do not edit. */\n");
- fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
- fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
- fprintf(file, "#endif\n");
fprintf(file, "\n");
- fprintf(file, "static const unsigned int utf8vers = %#x;\n",
- unicode_maxage);
+ fprintf(file, "#include <linux/module.h>\n");
+ fprintf(file, "#include <linux/kernel.h>\n");
+ fprintf(file, "#include \"utf8n.h\"\n");
fprintf(file, "\n");
fprintf(file, "static const unsigned int utf8agetab[] = {\n");
for (i = 0; i != ages_count; i++)
@@ -3339,6 +3337,22 @@ static void write_file(void)
fprintf(file, "\n");
}
fprintf(file, "};\n");
+ fprintf(file, "\n");
+ fprintf(file, "struct utf8data_table utf8_data_table = {\n");
+ fprintf(file, "\t.utf8agetab = utf8agetab,\n");
+ fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8nfdicfdata = utf8nfdicfdata,\n");
+ fprintf(file, "\t.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8nfdidata = utf8nfdidata,\n");
+ fprintf(file, "\t.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8data = utf8data,\n");
+ fprintf(file, "};\n");
+ fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);");
+ fprintf(file, "\n");
+ fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n");
fclose(file);
}
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index dc25823bfed9..67aaadc3ab07 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -5,16 +5,13 @@
#include <linux/slab.h>
#include <linux/parser.h>
#include <linux/errno.h>
-#include <linux/unicode.h>
#include <linux/stringhash.h>
#include "utf8n.h"
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
{
- const struct utf8data *data = utf8nfdi(um->version);
-
- if (utf8nlen(data, str->name, str->len) < 0)
+ if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
return -1;
return 0;
}
@@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
- const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
return -EINVAL;
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
+ if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
return -EINVAL;
do {
@@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL;
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
+ if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
return -EINVAL;
do {
@@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1;
int c1, c2;
int i = 0;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL;
do {
@@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
size_t nlen = 0;
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
@@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold);
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct qstr *str)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
int c;
unsigned long hash = init_name_hash(salt);
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL;
while ((c = utf8byte(&cur))) {
@@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
- const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur;
ssize_t nlen = 0;
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
@@ -167,69 +158,59 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
}
return -EINVAL;
}
-
EXPORT_SYMBOL(utf8_normalize);
-static int utf8_parse_version(const char *version, unsigned int *maj,
- unsigned int *min, unsigned int *rev)
+static const struct utf8data *find_table_version(const struct utf8data *table,
+ size_t nr_entries, unsigned int version)
{
- substring_t args[3];
- char version_string[12];
- static const struct match_token token[] = {
- {1, "%d.%d.%d"},
- {0, NULL}
- };
-
- strncpy(version_string, version, sizeof(version_string));
-
- if (match_token(version_string, token, args) != 1)
- return -EINVAL;
-
- if (match_int(&args[0], maj) || match_int(&args[1], min) ||
- match_int(&args[2], rev))
- return -EINVAL;
+ size_t i = nr_entries - 1;
- return 0;
+ while (version < table[i].maxage)
+ i--;
+ if (version > table[i].maxage)
+ return NULL;
+ return &table[i];
}
-struct unicode_map *utf8_load(const char *version)
+struct unicode_map *utf8_load(unsigned int version)
{
- struct unicode_map *um = NULL;
- int unicode_version;
-
- if (version) {
- unsigned int maj, min, rev;
-
- if (utf8_parse_version(version, &maj, &min, &rev) < 0)
- return ERR_PTR(-EINVAL);
-
- if (!utf8version_is_supported(maj, min, rev))
- return ERR_PTR(-EINVAL);
-
- unicode_version = UNICODE_AGE(maj, min, rev);
- } else {
- unicode_version = utf8version_latest();
- printk(KERN_WARNING"UTF-8 version not specified. "
- "Assuming latest supported version (%d.%d.%d).",
- (unicode_version >> 16) & 0xff,
- (unicode_version >> 8) & 0xff,
- (unicode_version & 0xff));
- }
+ struct unicode_map *um;
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
if (!um)
return ERR_PTR(-ENOMEM);
-
- um->charset = "UTF-8";
- um->version = unicode_version;
-
+ um->version = version;
+
+ um->tables = symbol_request(utf8_data_table);
+ if (!um->tables)
+ goto out_free_um;
+
+ if (!utf8version_is_supported(um, version))
+ goto out_symbol_put;
+ um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata,
+ um->tables->utf8nfdidata_size, um->version);
+ if (!um->ntab[UTF8_NFDI])
+ goto out_symbol_put;
+ um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata,
+ um->tables->utf8nfdicfdata_size, um->version);
+ if (!um->ntab[UTF8_NFDICF])
+ goto out_symbol_put;
return um;
+
+out_symbol_put:
+ symbol_put(um->tables);
+out_free_um:
+ kfree(um);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL(utf8_load);
void utf8_unload(struct unicode_map *um)
{
- kfree(um);
+ if (um) {
+ symbol_put(utf8_data_table);
+ kfree(um);
+ }
}
EXPORT_SYMBOL(utf8_unload);
diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c
index 1d2d2e5b906a..768f8ab448b8 100644
--- a/fs/unicode/utf8-norm.c
+++ b/fs/unicode/utf8-norm.c
@@ -6,34 +6,17 @@
#include "utf8n.h"
-struct utf8data {
- unsigned int maxage;
- unsigned int offset;
-};
-
-#define __INCLUDED_FROM_UTF8NORM_C__
-#include "utf8data.h"
-#undef __INCLUDED_FROM_UTF8NORM_C__
-
-int utf8version_is_supported(u8 maj, u8 min, u8 rev)
+int utf8version_is_supported(const struct unicode_map *um, unsigned int version)
{
- int i = ARRAY_SIZE(utf8agetab) - 1;
- unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev);
+ int i = um->tables->utf8agetab_size - 1;
- while (i >= 0 && utf8agetab[i] != 0) {
- if (sb_utf8version == utf8agetab[i])
+ while (i >= 0 && um->tables->utf8agetab[i] != 0) {
+ if (version == um->tables->utf8agetab[i])
return 1;
i--;
}
return 0;
}
-EXPORT_SYMBOL(utf8version_is_supported);
-
-int utf8version_latest(void)
-{
- return utf8vers;
-}
-EXPORT_SYMBOL(utf8version_latest);
/*
* UTF-8 valid ranges.
@@ -168,7 +151,7 @@ typedef const unsigned char utf8trie_t;
* underlying datatype: unsigned char.
*
* leaf[0]: The unicode version, stored as a generation number that is
- * an index into utf8agetab[]. With this we can filter code
+ * an index into ->utf8agetab[]. With this we can filter code
* points based on the unicode version in which they were
* defined. The CCC of a non-defined code point is 0.
* leaf[1]: Canonical Combining Class. During normalization, we need
@@ -316,21 +299,19 @@ utf8hangul(const char *str, unsigned char *hangul)
* is well-formed and corresponds to a known unicode code point. The
* shorthand for this will be "is valid UTF-8 unicode".
*/
-static utf8leaf_t *utf8nlookup(const struct utf8data *data,
- unsigned char *hangul, const char *s, size_t len)
+static utf8leaf_t *utf8nlookup(const struct unicode_map *um,
+ enum utf8_normalization n, unsigned char *hangul, const char *s,
+ size_t len)
{
- utf8trie_t *trie = NULL;
+ utf8trie_t *trie = um->tables->utf8data + um->ntab[n]->offset;
int offlen;
int offset;
int mask;
int node;
- if (!data)
- return NULL;
if (len == 0)
return NULL;
- trie = utf8data + data->offset;
node = 1;
while (node) {
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
@@ -392,172 +373,29 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data,
*
* Forwards to utf8nlookup().
*/
-static utf8leaf_t *utf8lookup(const struct utf8data *data,
- unsigned char *hangul, const char *s)
+static utf8leaf_t *utf8lookup(const struct unicode_map *um,
+ enum utf8_normalization n, unsigned char *hangul, const char *s)
{
- return utf8nlookup(data, hangul, s, (size_t)-1);
-}
-
-/*
- * Maximum age of any character in s.
- * Return -1 if s is not valid UTF-8 unicode.
- * Return 0 if only non-assigned code points are used.
- */
-int utf8agemax(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- int age = 0;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
-
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
-
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age > age)
- age = leaf_age;
- s += utf8clen(s);
- }
- return age;
+ return utf8nlookup(um, n, hangul, s, (size_t)-1);
}
-EXPORT_SYMBOL(utf8agemax);
-
-/*
- * Minimum age of any character in s.
- * Return -1 if s is not valid UTF-8 unicode.
- * Return 0 if non-assigned code points are used.
- */
-int utf8agemin(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- int age;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- age = data->maxage;
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age < age)
- age = leaf_age;
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8agemin);
-
-/*
- * Maximum age of any character in s, touch at most len bytes.
- * Return -1 if s is not valid UTF-8 unicode.
- */
-int utf8nagemax(const struct utf8data *data, const char *s, size_t len)
-{
- utf8leaf_t *leaf;
- int age = 0;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
-
- while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age > age)
- age = leaf_age;
- len -= utf8clen(s);
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8nagemax);
-
-/*
- * Maximum age of any character in s, touch at most len bytes.
- * Return -1 if s is not valid UTF-8 unicode.
- */
-int utf8nagemin(const struct utf8data *data, const char *s, size_t len)
-{
- utf8leaf_t *leaf;
- int leaf_age;
- int age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- age = data->maxage;
- while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age < age)
- age = leaf_age;
- len -= utf8clen(s);
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8nagemin);
-
-/*
- * Length of the normalization of s.
- * Return -1 if s is not valid UTF-8 unicode.
- *
- * A string of Default_Ignorable_Code_Point has length 0.
- */
-ssize_t utf8len(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- size_t ret = 0;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
- if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
- ret += utf8clen(s);
- else if (LEAF_CCC(leaf) == DECOMPOSE)
- ret += strlen(LEAF_STR(leaf));
- else
- ret += utf8clen(s);
- s += utf8clen(s);
- }
- return ret;
-}
-EXPORT_SYMBOL(utf8len);
/*
* Length of the normalization of s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode.
*/
-ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
+ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s, size_t len)
{
utf8leaf_t *leaf;
size_t ret = 0;
unsigned char hangul[UTF8HANGULLEAF];
- if (!data)
- return -1;
while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
+ leaf = utf8nlookup(um, n, hangul, s, len);
if (!leaf)
return -1;
- if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
+ if (um->tables->utf8agetab[LEAF_GEN(leaf)] >
+ um->ntab[n]->maxage)
ret += utf8clen(s);
else if (LEAF_CCC(leaf) == DECOMPOSE)
ret += strlen(LEAF_STR(leaf));
@@ -568,7 +406,6 @@ ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
}
return ret;
}
-EXPORT_SYMBOL(utf8nlen);
/*
* Set up an utf8cursor for use by utf8byte().
@@ -580,14 +417,13 @@ EXPORT_SYMBOL(utf8nlen);
*
* Returns -1 on error, 0 on success.
*/
-int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s, size_t len)
+int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s, size_t len)
{
- if (!data)
- return -1;
if (!s)
return -1;
- u8c->data = data;
+ u8c->um = um;
+ u8c->n = n;
u8c->s = s;
u8c->p = NULL;
u8c->ss = NULL;
@@ -604,23 +440,6 @@ int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
return -1;
return 0;
}
-EXPORT_SYMBOL(utf8ncursor);
-
-/*
- * Set up an utf8cursor for use by utf8byte().
- *
- * u8c : pointer to cursor.
- * data : const struct utf8data to use for normalization.
- * s : NUL-terminated string.
- *
- * Returns -1 on error, 0 on success.
- */
-int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s)
-{
- return utf8ncursor(u8c, data, s, (unsigned int)-1);
-}
-EXPORT_SYMBOL(utf8cursor);
/*
* Get one byte from the normalized form of the string described by u8c.
@@ -678,9 +497,9 @@ int utf8byte(struct utf8cursor *u8c)
/* Look up the data for the current character. */
if (u8c->p) {
- leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
+ leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
} else {
- leaf = utf8nlookup(u8c->data, u8c->hangul,
+ leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul,
u8c->s, u8c->len);
}
@@ -690,7 +509,8 @@ int utf8byte(struct utf8cursor *u8c)
ccc = LEAF_CCC(leaf);
/* Characters that are too new have CCC 0. */
- if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
+ if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] >
+ u8c->um->ntab[u8c->n]->maxage) {
ccc = STOPPER;
} else if (ccc == DECOMPOSE) {
u8c->len -= utf8clen(u8c->s);
@@ -704,7 +524,7 @@ int utf8byte(struct utf8cursor *u8c)
goto ccc_mismatch;
}
- leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
+ leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
if (!leaf)
return -1;
ccc = LEAF_CCC(leaf);
@@ -765,28 +585,10 @@ ccc_mismatch:
}
}
}
-EXPORT_SYMBOL(utf8byte);
-
-const struct utf8data *utf8nfdi(unsigned int maxage)
-{
- int i = ARRAY_SIZE(utf8nfdidata) - 1;
-
- while (maxage < utf8nfdidata[i].maxage)
- i--;
- if (maxage > utf8nfdidata[i].maxage)
- return NULL;
- return &utf8nfdidata[i];
-}
-EXPORT_SYMBOL(utf8nfdi);
-
-const struct utf8data *utf8nfdicf(unsigned int maxage)
-{
- int i = ARRAY_SIZE(utf8nfdicfdata) - 1;
- while (maxage < utf8nfdicfdata[i].maxage)
- i--;
- if (maxage > utf8nfdicfdata[i].maxage)
- return NULL;
- return &utf8nfdicfdata[i];
-}
-EXPORT_SYMBOL(utf8nfdicf);
+#ifdef CONFIG_UNICODE_NORMALIZATION_SELFTEST_MODULE
+EXPORT_SYMBOL_GPL(utf8version_is_supported);
+EXPORT_SYMBOL_GPL(utf8nlen);
+EXPORT_SYMBOL_GPL(utf8ncursor);
+EXPORT_SYMBOL_GPL(utf8byte);
+#endif
diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c
index 6fe8af7edccb..eb2bbdd688d7 100644
--- a/fs/unicode/utf8-selftest.c
+++ b/fs/unicode/utf8-selftest.c
@@ -18,9 +18,7 @@ unsigned int failed_tests;
unsigned int total_tests;
/* Tests will be based on this version. */
-#define latest_maj 12
-#define latest_min 1
-#define latest_rev 0
+#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
#define _test(cond, func, line, fmt, ...) do { \
total_tests++; \
@@ -160,18 +158,22 @@ static const struct {
}
};
-static void check_utf8_nfdi(void)
+static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s)
+{
+ return utf8nlen(um, n, s, (size_t)-1);
+}
+
+static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s)
+{
+ return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
+}
+
+static void check_utf8_nfdi(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
- const struct utf8data *data;
-
- data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
- if (!data) {
- pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
int len = strlen(nfdi_test_data[i].str);
@@ -179,10 +181,11 @@ static void check_utf8_nfdi(void)
int j = 0;
unsigned char c;
- test((utf8len(data, nfdi_test_data[i].str) == nlen));
- test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
+ test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
+ test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
+ nlen));
- if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
+ if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -196,18 +199,10 @@ static void check_utf8_nfdi(void)
}
}
-static void check_utf8_nfdicf(void)
+static void check_utf8_nfdicf(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
- const struct utf8data *data;
-
- data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
- if (!data) {
- pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
int len = strlen(nfdicf_test_data[i].str);
@@ -215,10 +210,13 @@ static void check_utf8_nfdicf(void)
int j = 0;
unsigned char c;
- test((utf8len(data, nfdicf_test_data[i].str) == nlen));
- test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
+ test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
+ nlen));
+ test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
+ nlen));
- if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
+ if (utf8cursor(&u8c, um, UTF8_NFDICF,
+ nfdicf_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -232,16 +230,9 @@ static void check_utf8_nfdicf(void)
}
}
-static void check_utf8_comparisons(void)
+static void check_utf8_comparisons(struct unicode_map *table)
{
int i;
- struct unicode_map *table = utf8_load("12.1.0");
-
- if (IS_ERR(table)) {
- pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
const struct qstr s1 = {.name = nfdi_test_data[i].str,
@@ -262,42 +253,49 @@ static void check_utf8_comparisons(void)
test_f(!utf8_strncasecmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name);
}
-
- utf8_unload(table);
}
-static void check_supported_versions(void)
+static void check_supported_versions(struct unicode_map *um)
{
/* Unicode 7.0.0 should be supported. */
- test(utf8version_is_supported(7, 0, 0));
+ test(utf8version_is_supported(um, UNICODE_AGE(7, 0, 0)));
/* Unicode 9.0.0 should be supported. */
- test(utf8version_is_supported(9, 0, 0));
+ test(utf8version_is_supported(um, UNICODE_AGE(9, 0, 0)));
/* Unicode 1x.0.0 (the latest version) should be supported. */
- test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
+ test(utf8version_is_supported(um, UTF8_LATEST));
/* Next versions don't exist. */
- test(!utf8version_is_supported(13, 0, 0));
- test(!utf8version_is_supported(0, 0, 0));
- test(!utf8version_is_supported(-1, -1, -1));
+ test(!utf8version_is_supported(um, UNICODE_AGE(13, 0, 0)));
+ test(!utf8version_is_supported(um, UNICODE_AGE(0, 0, 0)));
+ test(!utf8version_is_supported(um, UNICODE_AGE(-1, -1, -1)));
}
static int __init init_test_ucd(void)
{
+ struct unicode_map *um;
+
failed_tests = 0;
total_tests = 0;
- check_supported_versions();
- check_utf8_nfdi();
- check_utf8_nfdicf();
- check_utf8_comparisons();
+ um = utf8_load(UTF8_LATEST);
+ if (IS_ERR(um)) {
+ pr_err("%s: Unable to load utf8 table.\n", __func__);
+ return PTR_ERR(um);
+ }
+
+ check_supported_versions(um);
+ check_utf8_nfdi(um);
+ check_utf8_nfdicf(um);
+ check_utf8_comparisons(um);
if (!failed_tests)
pr_info("All %u tests passed\n", total_tests);
else
pr_err("%u out of %u tests failed\n", failed_tests,
total_tests);
+ utf8_unload(um);
return 0;
}
diff --git a/fs/unicode/utf8data.h_shipped b/fs/unicode/utf8data.c_shipped
index 76e4f0e1b089..d9b62901aa96 100644
--- a/fs/unicode/utf8data.h_shipped
+++ b/fs/unicode/utf8data.c_shipped
@@ -1,9 +1,8 @@
/* This file is generated code, do not edit. */
-#ifndef __INCLUDED_FROM_UTF8NORM_C__
-#error Only nls_utf8-norm.c should include this file.
-#endif
-static const unsigned int utf8vers = 0xc0100;
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "utf8n.h"
static const unsigned int utf8agetab[] = {
0,
@@ -4107,3 +4106,18 @@ static const unsigned char utf8data[64256] = {
0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00,
0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00
};
+
+struct utf8data_table utf8_data_table = {
+ .utf8agetab = utf8agetab,
+ .utf8agetab_size = ARRAY_SIZE(utf8agetab),
+
+ .utf8nfdicfdata = utf8nfdicfdata,
+ .utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),
+
+ .utf8nfdidata = utf8nfdidata,
+ .utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),
+
+ .utf8data = utf8data,
+};
+EXPORT_SYMBOL_GPL(utf8_data_table);
+MODULE_LICENSE("GPL v2");
diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h
index 0acd530c2c79..bd00d587747a 100644
--- a/fs/unicode/utf8n.h
+++ b/fs/unicode/utf8n.h
@@ -11,53 +11,9 @@
#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>
+#include <linux/unicode.h>
-/* Encoding a unicode version number as a single unsigned int. */
-#define UNICODE_MAJ_SHIFT (16)
-#define UNICODE_MIN_SHIFT (8)
-
-#define UNICODE_AGE(MAJ, MIN, REV) \
- (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
- ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
- ((unsigned int)(REV)))
-
-/* Highest unicode version supported by the data tables. */
-extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
-extern int utf8version_latest(void);
-
-/*
- * Look for the correct const struct utf8data for a unicode version.
- * Returns NULL if the version requested is too new.
- *
- * Two normalization forms are supported: nfdi and nfdicf.
- *
- * nfdi:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- *
- * nfdicf:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- * - Apply a full casefold (C + F).
- */
-extern const struct utf8data *utf8nfdi(unsigned int maxage);
-extern const struct utf8data *utf8nfdicf(unsigned int maxage);
-
-/*
- * Determine the maximum age of any unicode character in the string.
- * Returns 0 if only unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemax(const struct utf8data *data, const char *s);
-extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
-
-/*
- * Determine the minimum age of any unicode character in the string.
- * Returns 0 if any unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemin(const struct utf8data *data, const char *s);
-extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
+int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
/*
* Determine the length of the normalized from of the string,
@@ -65,8 +21,8 @@ extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
* Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
-extern ssize_t utf8len(const struct utf8data *data, const char *s);
-extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
+ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s, size_t len);
/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12)
@@ -75,7 +31,8 @@ extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
* Cursor structure used by the normalizer.
*/
struct utf8cursor {
- const struct utf8data *data;
+ const struct unicode_map *um;
+ enum utf8_normalization n;
const char *s;
const char *p;
const char *ss;
@@ -92,10 +49,8 @@ struct utf8cursor {
* Returns 0 on success.
* Returns -1 on failure.
*/
-extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s);
-extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s, size_t len);
+int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s, size_t len);
/*
* Get the next byte in the normalization.
@@ -105,4 +60,24 @@ extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
*/
extern int utf8byte(struct utf8cursor *u8c);
+struct utf8data {
+ unsigned int maxage;
+ unsigned int offset;
+};
+
+struct utf8data_table {
+ const unsigned int *utf8agetab;
+ int utf8agetab_size;
+
+ const struct utf8data *utf8nfdicfdata;
+ int utf8nfdicfdata_size;
+
+ const struct utf8data *utf8nfdidata;
+ int utf8nfdidata_size;
+
+ const unsigned char *utf8data;
+};
+
+extern struct utf8data_table utf8_data_table;
+
#endif /* UTF8NORM_H */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 22bf14ab2d16..e26b10132d47 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/poll.h>
#include <linux/slab.h>
@@ -877,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX, vma_anon_name(vma));
if (prev)
vma = prev;
else
@@ -1436,7 +1437,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- ((struct vm_userfaultfd_ctx){ ctx }));
+ ((struct vm_userfaultfd_ctx){ ctx }),
+ vma_anon_name(vma));
if (prev) {
vma = prev;
goto next;
@@ -1613,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX, vma_anon_name(vma));
if (prev) {
vma = prev;
goto next;
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 6f49bf39183c..c557a030acfe 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/backing-dev.h>
#include "xfs_message.h"
#include "xfs_trace.h"
@@ -26,6 +25,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
current->comm, current->pid,
(unsigned int)size, __func__, lflags);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(lflags);
} while (1);
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index c43877c8a279..505533c43a92 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -93,21 +93,6 @@ struct getbmapx {
#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
/*
- * Structure for XFS_IOC_FSSETDM.
- * For use by backup and restore programs to set the XFS on-disk inode
- * fields di_dmevmask and di_dmstate. These must be set to exactly and
- * only values previously obtained via xfs_bulkstat! (Specifically the
- * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
- */
-#ifndef HAVE_FSDMIDATA
-struct fsdmidata {
- __u32 fsd_dmevmask; /* corresponds to di_dmevmask */
- __u16 fsd_padding;
- __u16 fsd_dmstate; /* corresponds to di_dmstate */
-};
-#endif
-
-/*
* File segment locking set data type for 64 bit access.
* Also used for all the RESV/FREE interfaces.
*/
@@ -562,16 +547,10 @@ typedef struct xfs_fsop_handlereq {
/*
* Compound structures for passing args through Handle Request interfaces
- * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle
- * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and
- * XFS_IOC_ATTRMULTI_BY_HANDLE
+ * xfs_attrlist_by_handle, xfs_attrmulti_by_handle
+ * - ioctls: XFS_IOC_ATTRLIST_BY_HANDLE, and XFS_IOC_ATTRMULTI_BY_HANDLE
*/
-typedef struct xfs_fsop_setdm_handlereq {
- struct xfs_fsop_handlereq hreq; /* handle information */
- struct fsdmidata __user *data; /* DMAPI data */
-} xfs_fsop_setdm_handlereq_t;
-
/*
* Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface.
*
@@ -781,15 +760,15 @@ struct xfs_scrub_metadata {
* For 'documentation' purposed more than anything else,
* the "cmd #" field reflects the IRIX fcntl number.
*/
-#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
-#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP ------- deprecated 10 */
+/* XFS_IOC_FREESP -------- deprecated 11 */
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
-#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
-#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP64 ----- deprecated 36 */
+/* XFS_IOC_FREESP64 ------ deprecated 37 */
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
-#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata)
+/* XFS_IOC_FSSETDM ------- deprecated 39 */
#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64)
#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64)
#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64)
@@ -831,7 +810,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FREEZE _IOWR('X', 119, int) /* aka FIFREEZE */
#define XFS_IOC_THAW _IOWR('X', 120, int) /* aka FITHAW */
-#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
+/* XFS_IOC_FSSETDM_BY_HANDLE -- deprecated 121 */
#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index bed798792226..90aebfe9dc5f 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -281,7 +281,7 @@ xchk_superblock(
features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
if ((sb->sb_features2 & features_mask) !=
(cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
- xchk_block_set_corrupt(sc, bp);
+ xchk_block_set_preen(sc, bp);
if (!xfs_has_crc(mp)) {
/* all v5 fields must be zero */
@@ -290,39 +290,38 @@ xchk_superblock(
offsetof(struct xfs_dsb, sb_features_compat)))
xchk_block_set_corrupt(sc, bp);
} else {
- /* Check compat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
- if ((sb->sb_features_compat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
+ /* compat features must match */
+ if (sb->sb_features_compat !=
+ cpu_to_be32(mp->m_sb.sb_features_compat))
xchk_block_set_corrupt(sc, bp);
- /* Check ro compat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
- XFS_SB_FEAT_RO_COMPAT_FINOBT |
- XFS_SB_FEAT_RO_COMPAT_RMAPBT |
- XFS_SB_FEAT_RO_COMPAT_REFLINK);
- if ((sb->sb_features_ro_compat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_ro_compat) &
- features_mask))
+ /* ro compat features must match */
+ if (sb->sb_features_ro_compat !=
+ cpu_to_be32(mp->m_sb.sb_features_ro_compat))
xchk_block_set_corrupt(sc, bp);
- /* Check incompat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
- XFS_SB_FEAT_INCOMPAT_FTYPE |
- XFS_SB_FEAT_INCOMPAT_SPINODES |
- XFS_SB_FEAT_INCOMPAT_META_UUID);
- if ((sb->sb_features_incompat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_incompat) &
- features_mask))
- xchk_block_set_corrupt(sc, bp);
+ /*
+ * NEEDSREPAIR is ignored on a secondary super, so we should
+ * clear it when we find it, though it's not a corruption.
+ */
+ features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+ if ((cpu_to_be32(mp->m_sb.sb_features_incompat) ^
+ sb->sb_features_incompat) & features_mask)
+ xchk_block_set_preen(sc, bp);
- /* Check log incompat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
- if ((sb->sb_features_log_incompat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_log_incompat) &
- features_mask))
+ /* all other incompat features must match */
+ if ((cpu_to_be32(mp->m_sb.sb_features_incompat) ^
+ sb->sb_features_incompat) & ~features_mask)
xchk_block_set_corrupt(sc, bp);
+ /*
+ * log incompat features protect newer log record types from
+ * older log recovery code. Log recovery doesn't check the
+ * secondary supers, so we can clear these if needed.
+ */
+ if (sb->sb_features_log_incompat)
+ xchk_block_set_preen(sc, bp);
+
/* Don't care about sb_crc */
if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index d7bfed52f4cd..6da7f2ca77de 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -52,6 +52,18 @@ xrep_superblock(
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+ /*
+ * Don't write out a secondary super with NEEDSREPAIR or log incompat
+ * features set, since both are ignored when set on a secondary.
+ */
+ if (xfs_has_crc(mp)) {
+ struct xfs_dsb *sb = bp->b_addr;
+
+ sb->sb_features_incompat &=
+ ~cpu_to_be32(XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+ sb->sb_features_log_incompat = 0;
+ }
+
/* Write this to disk. */
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 797ea0c8b14e..d4a387d3d0ce 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -771,8 +771,7 @@ int
xfs_alloc_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len,
- int alloc_type)
+ xfs_off_t len)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -865,8 +864,8 @@ xfs_alloc_file_space(
goto error;
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, alloc_type, 0, imapp,
- &nimaps);
+ allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ &nimaps);
if (error)
goto error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 9f993168b55b..24b37d211f1d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -54,7 +54,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
/* preallocation and hole punch interface */
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len, int alloc_type);
+ xfs_off_t len);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index bbb0fbd34e64..b45e0d50a405 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -394,7 +394,7 @@ xfs_buf_alloc_pages(
}
XFS_STATS_INC(bp->b_mount, xb_page_retries);
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ memalloc_retry_wait(gfp_mask);
}
return 0;
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8310005af00f..a7174a5b3203 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -138,7 +138,8 @@ xfs_dir2_sf_getdents(
STATIC int
xfs_dir2_block_getdents(
struct xfs_da_args *args,
- struct dir_context *ctx)
+ struct dir_context *ctx,
+ unsigned int *lock_mode)
{
struct xfs_inode *dp = args->dp; /* incore directory inode */
struct xfs_buf *bp; /* buffer for block */
@@ -146,7 +147,6 @@ xfs_dir2_block_getdents(
int wantoff; /* starting block offset */
xfs_off_t cook;
struct xfs_da_geometry *geo = args->geo;
- int lock_mode;
unsigned int offset, next_offset;
unsigned int end;
@@ -156,12 +156,13 @@ xfs_dir2_block_getdents(
if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
return 0;
- lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir3_block_read(args->trans, dp, &bp);
- xfs_iunlock(dp, lock_mode);
if (error)
return error;
+ xfs_iunlock(dp, *lock_mode);
+ *lock_mode = 0;
+
/*
* Extract the byte offset we start at from the seek pointer.
* We'll skip entries before this.
@@ -344,7 +345,8 @@ STATIC int
xfs_dir2_leaf_getdents(
struct xfs_da_args *args,
struct dir_context *ctx,
- size_t bufsize)
+ size_t bufsize,
+ unsigned int *lock_mode)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
@@ -356,7 +358,6 @@ xfs_dir2_leaf_getdents(
xfs_dir2_off_t curoff; /* current overall offset */
int length; /* temporary length value */
int byteoff; /* offset in current block */
- int lock_mode;
unsigned int offset = 0;
int error = 0; /* error return value */
@@ -390,13 +391,16 @@ xfs_dir2_leaf_getdents(
bp = NULL;
}
- lock_mode = xfs_ilock_data_map_shared(dp);
+ if (*lock_mode == 0)
+ *lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
&rablk, &bp);
- xfs_iunlock(dp, lock_mode);
if (error || !bp)
break;
+ xfs_iunlock(dp, *lock_mode);
+ *lock_mode = 0;
+
xfs_dir3_data_check(dp, bp);
/*
* Find our position in the block.
@@ -496,7 +500,7 @@ xfs_dir2_leaf_getdents(
*
* If supplied, the transaction collects locked dir buffers to avoid
* nested buffer deadlocks. This function does not dirty the
- * transaction. The caller should ensure that the inode is locked
+ * transaction. The caller must hold the IOLOCK (shared or exclusive)
* before calling this function.
*/
int
@@ -507,8 +511,9 @@ xfs_readdir(
size_t bufsize)
{
struct xfs_da_args args = { NULL };
- int rval;
- int v;
+ unsigned int lock_mode;
+ int isblock;
+ int error;
trace_xfs_readdir(dp);
@@ -516,6 +521,7 @@ xfs_readdir(
return -EIO;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+ ASSERT(xfs_isilocked(dp, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
args.dp = dp;
@@ -523,13 +529,22 @@ xfs_readdir(
args.trans = tp;
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_getdents(&args, ctx);
- else if ((rval = xfs_dir2_isblock(&args, &v)))
- ;
- else if (v)
- rval = xfs_dir2_block_getdents(&args, ctx);
- else
- rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
+ return xfs_dir2_sf_getdents(&args, ctx);
- return rval;
+ lock_mode = xfs_ilock_data_map_shared(dp);
+ error = xfs_dir2_isblock(&args, &isblock);
+ if (error)
+ goto out_unlock;
+
+ if (isblock) {
+ error = xfs_dir2_block_getdents(&args, ctx, &lock_mode);
+ goto out_unlock;
+ }
+
+ error = xfs_dir2_leaf_getdents(&args, ctx, bufsize, &lock_mode);
+
+out_unlock:
+ if (lock_mode)
+ xfs_iunlock(dp, lock_mode);
+ return error;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d4c5ca261bd..22ad207bedf4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1051,8 +1051,7 @@ xfs_file_fallocate(
}
if (!xfs_is_always_cow_inode(ip)) {
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 2e718728986f..9644f938990c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1854,28 +1854,20 @@ xfs_inodegc_worker(
}
/*
- * Force all currently queued inode inactivation work to run immediately, and
- * wait for the work to finish. Two pass - queue all the work first pass, wait
- * for it in a second pass.
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
*/
void
xfs_inodegc_flush(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_is_inodegc_enabled(mp))
return;
trace_xfs_inodegc_flush(mp, __return_address);
xfs_inodegc_queue_all(mp);
-
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- flush_work(&gc->work);
- }
+ flush_workqueue(mp->m_inodegc_wq);
}
/*
@@ -1886,18 +1878,12 @@ void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_clear_inodegc_enabled(mp))
return;
xfs_inodegc_queue_all(mp);
+ drain_workqueue(mp->m_inodegc_wq);
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- cancel_work_sync(&gc->work);
- }
trace_xfs_inodegc_stop(mp, __return_address);
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8ea47a9d5aad..03a6198c97f6 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -627,87 +627,6 @@ xfs_attrmulti_by_handle(
return error;
}
-int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf)
-{
- struct inode *inode = file_inode(filp);
- struct xfs_inode *ip = XFS_I(inode);
- struct iattr iattr;
- enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR;
- uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
- int error;
-
- if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -EPERM;
-
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- if (xfs_is_always_cow_inode(ip))
- return -EOPNOTSUPP;
-
- if (filp->f_flags & O_DSYNC)
- flags |= XFS_PREALLOC_SYNC;
- if (filp->f_mode & FMODE_NOCMTIME)
- flags |= XFS_PREALLOC_INVISIBLE;
-
- error = mnt_want_write_file(filp);
- if (error)
- return error;
-
- xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
- if (error)
- goto out_unlock;
- inode_dio_wait(inode);
-
- switch (bf->l_whence) {
- case 0: /*SEEK_SET*/
- break;
- case 1: /*SEEK_CUR*/
- bf->l_start += filp->f_pos;
- break;
- case 2: /*SEEK_END*/
- bf->l_start += XFS_ISIZE(ip);
- break;
- default:
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start > XFS_ISIZE(ip)) {
- error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip),
- XFS_BMAPI_PREALLOC);
- if (error)
- goto out_unlock;
- }
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = bf->l_start;
- error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
- &iattr);
- if (error)
- goto out_unlock;
-
- error = xfs_update_prealloc_flags(ip, flags);
-
-out_unlock:
- xfs_iunlock(ip, iolock);
- mnt_drop_write_file(filp);
- return error;
-}
-
/* Return 0 on success or positive error */
int
xfs_fsbulkstat_one_fmt(
@@ -1936,6 +1855,15 @@ xfs_fs_eofblocks_from_user(
}
/*
+ * These long-unused ioctls were removed from the official ioctl API in 5.17,
+ * but retain these definitions so that we can log warnings about them.
+ */
+#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
+#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
+#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+
+/*
* Note: some of the ioctl's return positive numbers as a
* byte count indicating success, such as readlink_by_handle.
* So we don't "sign flip" like most other routines. This means
@@ -1965,13 +1893,11 @@ xfs_file_ioctl(
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64: {
- xfs_flock64_t bf;
-
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -EFAULT;
- return xfs_ioc_space(filp, &bf);
- }
+ case XFS_IOC_FREESP64:
+ xfs_warn_once(mp,
+ "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported",
+ current->comm);
+ return -ENOTTY;
case XFS_IOC_DIOINFO: {
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct dioattr da;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 845d3bcab74b..d4abba2c13c1 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -10,12 +10,6 @@ struct xfs_bstat;
struct xfs_ibulk;
struct xfs_inogrp;
-
-extern int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf);
-
int
xfs_ioc_swapext(
xfs_swapext_t *sxp);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 8783af203cfc..004ed2a251e8 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,22 +28,6 @@
#ifdef BROKEN_X86_ALIGNMENT
STATIC int
-xfs_compat_flock64_copyin(
- xfs_flock64_t *bf,
- compat_xfs_flock64_t __user *arg32)
-{
- if (get_user(bf->l_type, &arg32->l_type) ||
- get_user(bf->l_whence, &arg32->l_whence) ||
- get_user(bf->l_start, &arg32->l_start) ||
- get_user(bf->l_len, &arg32->l_len) ||
- get_user(bf->l_sysid, &arg32->l_sysid) ||
- get_user(bf->l_pid, &arg32->l_pid) ||
- copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -EFAULT;
- return 0;
-}
-
-STATIC int
xfs_compat_ioc_fsgeometry_v1(
struct xfs_mount *mp,
compat_xfs_fsop_geom_v1_t __user *arg32)
@@ -445,17 +429,6 @@ xfs_file_compat_ioctl(
switch (cmd) {
#if defined(BROKEN_X86_ALIGNMENT)
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32: {
- struct xfs_flock64 bf;
-
- if (xfs_compat_flock64_copyin(&bf, arg))
- return -EFAULT;
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- return xfs_ioc_space(filp, &bf);
- }
case XFS_IOC_FSGEOMETRY_V1_32:
return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
case XFS_IOC_FSGROWFSDATA_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 9929482bf358..c14852362fce 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -142,28 +142,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq {
_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
-
typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */
__u32 rtextsize; /* realtime extent size */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 3d503e74037f..fd7e8fbaeef1 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -285,7 +285,7 @@ do { \
* write-combining memory accesses before this macro with those after it.
*/
#ifndef io_stop_wc
-#define io_stop_wc do { } while (0)
+#define io_stop_wc() do { } while (0)
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index df9b5bc3d282..a47b8a71d6fe 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 5a28629cbf4d..d51beff60375 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -2,83 +2,19 @@
#ifndef _ASM_GENERIC_BITOPS_LE_H_
#define _ASM_GENERIC_BITOPS_LE_H_
-#include <asm-generic/bitops/find.h>
#include <asm/types.h>
#include <asm/byteorder.h>
-#include <linux/swab.h>
#if defined(__LITTLE_ENDIAN)
#define BITOP_LE_SWIZZLE 0
-static inline unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
-{
- return find_next_zero_bit(addr, size, offset);
-}
-
-static inline unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
-{
- return find_next_bit(addr, size, offset);
-}
-
-static inline unsigned long find_first_zero_bit_le(const void *addr,
- unsigned long size)
-{
- return find_first_zero_bit(addr, size);
-}
-
#elif defined(__BIG_ENDIAN)
#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
-#ifndef find_next_zero_bit_le
-static inline
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *(const unsigned long *)addr;
-
- if (unlikely(offset >= size))
- return size;
-
- val = swab(val) | ~GENMASK(size - 1, offset);
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
-}
-#endif
-
-#ifndef find_next_bit_le
-static inline
-unsigned long find_next_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *(const unsigned long *)addr;
-
- if (unlikely(offset >= size))
- return size;
-
- val = swab(val) & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
-}
#endif
-#ifndef find_first_zero_bit_le
-#define find_first_zero_bit_le(addr, size) \
- find_next_zero_bit_le((addr), (size), 0)
-#endif
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
static inline int test_bit_le(int nr, const void *addr)
{
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 8ed6733d5146..8f97c2927bee 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -540,39 +540,6 @@ enum hv_interrupt_source {
HV_INTERRUPT_SOURCE_IOAPIC,
};
-union hv_msi_address_register {
- u32 as_uint32;
- struct {
- u32 reserved1:2;
- u32 destination_mode:1;
- u32 redirection_hint:1;
- u32 reserved2:8;
- u32 destination_id:8;
- u32 msi_base:12;
- };
-} __packed;
-
-union hv_msi_data_register {
- u32 as_uint32;
- struct {
- u32 vector:8;
- u32 delivery_mode:3;
- u32 reserved1:3;
- u32 level_assert:1;
- u32 trigger_mode:1;
- u32 reserved2:16;
- };
-} __packed;
-
-/* HvRetargetDeviceInterrupt hypercall */
-union hv_msi_entry {
- u64 as_uint64;
- struct {
- union hv_msi_address_register address;
- union hv_msi_data_register data;
- } __packed;
-};
-
union hv_ioapic_rte {
u64 as_uint64;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 3e2248ac328e..c08758b6b364 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -49,8 +49,8 @@ struct ms_hyperv_info {
};
extern struct ms_hyperv_info ms_hyperv;
-extern void __percpu **hyperv_pcpu_input_arg;
-extern void __percpu **hyperv_pcpu_output_arg;
+extern void * __percpu *hyperv_pcpu_input_arg;
+extern void * __percpu *hyperv_pcpu_output_arg;
extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
@@ -269,6 +269,8 @@ bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
+void *hv_map_memory(void *addr, unsigned long size);
+void hv_unmap_memory(void *addr);
#else /* CONFIG_HYPERV */
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 02932efad3ab..977bea16cf1b 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#if CONFIG_PGTABLE_LEVELS > 3
+static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (pud_t *)get_zeroed_page(gfp);
+}
+
#ifndef __HAVE_ARCH_PUD_ALLOC_ONE
/**
* pud_alloc_one - allocate a page for PUD-level page table
@@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
*/
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- gfp_t gfp = GFP_PGTABLE_USER;
-
- if (mm == &init_mm)
- gfp = GFP_PGTABLE_KERNEL;
- return (pud_t *)get_zeroed_page(gfp);
+ return __pud_alloc_one(mm, addr);
}
#endif
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
}
+#ifndef __HAVE_ARCH_PUD_FREE
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ __pud_free(mm, pud);
+}
+#endif
+
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#ifndef __HAVE_ARCH_PGD_FREE
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index df3c6c2f9553..f9ffd39194eb 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
blake2s_final(&state, out);
}
-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
- const size_t keylen);
-
#endif /* _CRYPTO_BLAKE2S_H */
diff --git a/include/dt-bindings/iio/addac/adi,ad74413r.h b/include/dt-bindings/iio/addac/adi,ad74413r.h
new file mode 100644
index 000000000000..204f92bbd79f
--- /dev/null
+++ b/include/dt-bindings/iio/addac/adi,ad74413r.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_ADI_AD74413R_H
+#define _DT_BINDINGS_ADI_AD74413R_H
+
+#define CH_FUNC_HIGH_IMPEDANCE 0x0
+#define CH_FUNC_VOLTAGE_OUTPUT 0x1
+#define CH_FUNC_CURRENT_OUTPUT 0x2
+#define CH_FUNC_VOLTAGE_INPUT 0x3
+#define CH_FUNC_CURRENT_INPUT_EXT_POWER 0x4
+#define CH_FUNC_CURRENT_INPUT_LOOP_POWER 0x5
+#define CH_FUNC_RESISTANCE_INPUT 0x6
+#define CH_FUNC_DIGITAL_INPUT_LOGIC 0x7
+#define CH_FUNC_DIGITAL_INPUT_LOOP_POWER 0x8
+#define CH_FUNC_CURRENT_INPUT_EXT_POWER_HART 0x9
+#define CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART 0xA
+
+#define CH_FUNC_MIN CH_FUNC_HIGH_IMPEDANCE
+#define CH_FUNC_MAX CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART
+
+#endif /* _DT_BINDINGS_ADI_AD74413R_H */
diff --git a/include/dt-bindings/interconnect/qcom,msm8996.h b/include/dt-bindings/interconnect/qcom,msm8996.h
new file mode 100644
index 000000000000..a0b7c0ec7bed
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,msm8996.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * Qualcomm MSM8996 interconnect IDs
+ *
+ * Copyright (c) 2021 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H
+
+/* A0NOC */
+#define MASTER_PCIE_0 0
+#define MASTER_PCIE_1 1
+#define MASTER_PCIE_2 2
+
+/* A1NOC */
+#define MASTER_CNOC_A1NOC 0
+#define MASTER_CRYPTO_CORE0 1
+#define MASTER_PNOC_A1NOC 2
+
+/* A2NOC */
+#define MASTER_USB3 0
+#define MASTER_IPA 1
+#define MASTER_UFS 2
+
+/* BIMC */
+#define MASTER_AMPSS_M0 0
+#define MASTER_GRAPHICS_3D 1
+#define MASTER_MNOC_BIMC 2
+#define MASTER_SNOC_BIMC 3
+#define SLAVE_EBI_CH0 4
+#define SLAVE_HMSS_L3 5
+#define SLAVE_BIMC_SNOC_0 6
+#define SLAVE_BIMC_SNOC_1 7
+
+/* CNOC */
+#define MASTER_SNOC_CNOC 0
+#define MASTER_QDSS_DAP 1
+#define SLAVE_CNOC_A1NOC 2
+#define SLAVE_CLK_CTL 3
+#define SLAVE_TCSR 4
+#define SLAVE_TLMM 5
+#define SLAVE_CRYPTO_0_CFG 6
+#define SLAVE_MPM 7
+#define SLAVE_PIMEM_CFG 8
+#define SLAVE_IMEM_CFG 9
+#define SLAVE_MESSAGE_RAM 10
+#define SLAVE_BIMC_CFG 11
+#define SLAVE_PMIC_ARB 12
+#define SLAVE_PRNG 13
+#define SLAVE_DCC_CFG 14
+#define SLAVE_RBCPR_MX 15
+#define SLAVE_QDSS_CFG 16
+#define SLAVE_RBCPR_CX 17
+#define SLAVE_QDSS_RBCPR_APU 18
+#define SLAVE_CNOC_MNOC_CFG 19
+#define SLAVE_SNOC_CFG 20
+#define SLAVE_SNOC_MPU_CFG 21
+#define SLAVE_EBI1_PHY_CFG 22
+#define SLAVE_A0NOC_CFG 23
+#define SLAVE_PCIE_1_CFG 24
+#define SLAVE_PCIE_2_CFG 25
+#define SLAVE_PCIE_0_CFG 26
+#define SLAVE_PCIE20_AHB2PHY 27
+#define SLAVE_A0NOC_MPU_CFG 28
+#define SLAVE_UFS_CFG 29
+#define SLAVE_A1NOC_CFG 30
+#define SLAVE_A1NOC_MPU_CFG 31
+#define SLAVE_A2NOC_CFG 32
+#define SLAVE_A2NOC_MPU_CFG 33
+#define SLAVE_SSC_CFG 34
+#define SLAVE_A0NOC_SMMU_CFG 35
+#define SLAVE_A1NOC_SMMU_CFG 36
+#define SLAVE_A2NOC_SMMU_CFG 37
+#define SLAVE_LPASS_SMMU_CFG 38
+#define SLAVE_CNOC_MNOC_MMSS_CFG 39
+
+/* MNOC */
+#define MASTER_CNOC_MNOC_CFG 0
+#define MASTER_CPP 1
+#define MASTER_JPEG 2
+#define MASTER_MDP_PORT0 3
+#define MASTER_MDP_PORT1 4
+#define MASTER_ROTATOR 5
+#define MASTER_VIDEO_P0 6
+#define MASTER_VFE 7
+#define MASTER_SNOC_VMEM 8
+#define MASTER_VIDEO_P0_OCMEM 9
+#define MASTER_CNOC_MNOC_MMSS_CFG 10
+#define SLAVE_MNOC_BIMC 11
+#define SLAVE_VMEM 12
+#define SLAVE_SERVICE_MNOC 13
+#define SLAVE_MMAGIC_CFG 14
+#define SLAVE_CPR_CFG 15
+#define SLAVE_MISC_CFG 16
+#define SLAVE_VENUS_THROTTLE_CFG 17
+#define SLAVE_VENUS_CFG 18
+#define SLAVE_VMEM_CFG 19
+#define SLAVE_DSA_CFG 20
+#define SLAVE_MMSS_CLK_CFG 21
+#define SLAVE_DSA_MPU_CFG 22
+#define SLAVE_MNOC_MPU_CFG 23
+#define SLAVE_DISPLAY_CFG 24
+#define SLAVE_DISPLAY_THROTTLE_CFG 25
+#define SLAVE_CAMERA_CFG 26
+#define SLAVE_CAMERA_THROTTLE_CFG 27
+#define SLAVE_GRAPHICS_3D_CFG 28
+#define SLAVE_SMMU_MDP_CFG 29
+#define SLAVE_SMMU_ROT_CFG 30
+#define SLAVE_SMMU_VENUS_CFG 31
+#define SLAVE_SMMU_CPP_CFG 32
+#define SLAVE_SMMU_JPEG_CFG 33
+#define SLAVE_SMMU_VFE_CFG 34
+
+/* PNOC */
+#define MASTER_SNOC_PNOC 0
+#define MASTER_SDCC_1 1
+#define MASTER_SDCC_2 2
+#define MASTER_SDCC_4 3
+#define MASTER_USB_HS 4
+#define MASTER_BLSP_1 5
+#define MASTER_BLSP_2 6
+#define MASTER_TSIF 7
+#define SLAVE_PNOC_A1NOC 8
+#define SLAVE_USB_HS 9
+#define SLAVE_SDCC_2 10
+#define SLAVE_SDCC_4 11
+#define SLAVE_TSIF 12
+#define SLAVE_BLSP_2 13
+#define SLAVE_SDCC_1 14
+#define SLAVE_BLSP_1 15
+#define SLAVE_PDM 16
+#define SLAVE_AHB2PHY 17
+
+/* SNOC */
+#define MASTER_HMSS 0
+#define MASTER_QDSS_BAM 1
+#define MASTER_SNOC_CFG 2
+#define MASTER_BIMC_SNOC_0 3
+#define MASTER_BIMC_SNOC_1 4
+#define MASTER_A0NOC_SNOC 5
+#define MASTER_A1NOC_SNOC 6
+#define MASTER_A2NOC_SNOC 7
+#define MASTER_QDSS_ETR 8
+#define SLAVE_A0NOC_SNOC 9
+#define SLAVE_A1NOC_SNOC 10
+#define SLAVE_A2NOC_SNOC 11
+#define SLAVE_HMSS 12
+#define SLAVE_LPASS 13
+#define SLAVE_USB3 14
+#define SLAVE_SNOC_BIMC 15
+#define SLAVE_SNOC_CNOC 16
+#define SLAVE_IMEM 17
+#define SLAVE_PIMEM 18
+#define SLAVE_SNOC_VMEM 19
+#define SLAVE_SNOC_PNOC 20
+#define SLAVE_QDSS_STM 21
+#define SLAVE_PCIE_0 22
+#define SLAVE_PCIE_1 23
+#define SLAVE_PCIE_2 24
+#define SLAVE_SERVICE_SNOC 25
+
+#endif
diff --git a/include/dt-bindings/interconnect/qcom,qcm2290.h b/include/dt-bindings/interconnect/qcom,qcm2290.h
new file mode 100644
index 000000000000..6cbbb7fe0bd3
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,qcm2290.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* QCM2290 interconnect IDs */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H
+
+/* BIMC */
+#define MASTER_APPSS_PROC 0
+#define MASTER_SNOC_BIMC_RT 1
+#define MASTER_SNOC_BIMC_NRT 2
+#define MASTER_SNOC_BIMC 3
+#define MASTER_TCU_0 4
+#define MASTER_GFX3D 5
+#define SLAVE_EBI1 6
+#define SLAVE_BIMC_SNOC 7
+
+/* CNOC */
+#define MASTER_SNOC_CNOC 0
+#define MASTER_QDSS_DAP 1
+#define SLAVE_BIMC_CFG 2
+#define SLAVE_CAMERA_NRT_THROTTLE_CFG 3
+#define SLAVE_CAMERA_RT_THROTTLE_CFG 4
+#define SLAVE_CAMERA_CFG 5
+#define SLAVE_CLK_CTL 6
+#define SLAVE_CRYPTO_0_CFG 7
+#define SLAVE_DISPLAY_CFG 8
+#define SLAVE_DISPLAY_THROTTLE_CFG 9
+#define SLAVE_GPU_CFG 10
+#define SLAVE_HWKM 11
+#define SLAVE_IMEM_CFG 12
+#define SLAVE_IPA_CFG 13
+#define SLAVE_LPASS 14
+#define SLAVE_MESSAGE_RAM 15
+#define SLAVE_PDM 16
+#define SLAVE_PIMEM_CFG 17
+#define SLAVE_PKA_WRAPPER 18
+#define SLAVE_PMIC_ARB 19
+#define SLAVE_PRNG 20
+#define SLAVE_QDSS_CFG 21
+#define SLAVE_QM_CFG 22
+#define SLAVE_QM_MPU_CFG 23
+#define SLAVE_QPIC 24
+#define SLAVE_QUP_0 25
+#define SLAVE_SDCC_1 26
+#define SLAVE_SDCC_2 27
+#define SLAVE_SNOC_CFG 28
+#define SLAVE_TCSR 29
+#define SLAVE_USB3 30
+#define SLAVE_VENUS_CFG 31
+#define SLAVE_VENUS_THROTTLE_CFG 32
+#define SLAVE_VSENSE_CTRL_CFG 33
+#define SLAVE_SERVICE_CNOC 34
+
+/* SNOC */
+#define MASTER_CRYPTO_CORE0 0
+#define MASTER_SNOC_CFG 1
+#define MASTER_TIC 2
+#define MASTER_ANOC_SNOC 3
+#define MASTER_BIMC_SNOC 4
+#define MASTER_PIMEM 5
+#define MASTER_QDSS_BAM 6
+#define MASTER_QUP_0 7
+#define MASTER_IPA 8
+#define MASTER_QDSS_ETR 9
+#define MASTER_SDCC_1 10
+#define MASTER_SDCC_2 11
+#define MASTER_QPIC 12
+#define MASTER_USB3_0 13
+#define SLAVE_APPSS 14
+#define SLAVE_SNOC_CNOC 15
+#define SLAVE_IMEM 16
+#define SLAVE_PIMEM 17
+#define SLAVE_SNOC_BIMC 18
+#define SLAVE_SERVICE_SNOC 19
+#define SLAVE_QDSS_STM 20
+#define SLAVE_TCU 21
+#define SLAVE_ANOC_SNOC 22
+
+/* QUP Virtual */
+#define MASTER_QUP_CORE_0 0
+#define SLAVE_QUP_CORE_0 1
+
+/* MMNRT Virtual */
+#define MASTER_CAMNOC_SF 0
+#define MASTER_VIDEO_P0 1
+#define MASTER_VIDEO_PROC 2
+#define SLAVE_SNOC_BIMC_NRT 3
+
+/* MMRT Virtual */
+#define MASTER_CAMNOC_HF 0
+#define MASTER_MDP0 1
+#define SLAVE_SNOC_BIMC_RT 2
+
+#endif
diff --git a/include/dt-bindings/interconnect/qcom,sm8450.h b/include/dt-bindings/interconnect/qcom,sm8450.h
new file mode 100644
index 000000000000..8f3c5e1fb4c4
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,sm8450.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Limited
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H
+
+#define MASTER_QSPI_0 0
+#define MASTER_QUP_1 1
+#define MASTER_A1NOC_CFG 2
+#define MASTER_SDCC_4 3
+#define MASTER_UFS_MEM 4
+#define MASTER_USB3_0 5
+#define SLAVE_A1NOC_SNOC 6
+#define SLAVE_SERVICE_A1NOC 7
+
+#define MASTER_QDSS_BAM 0
+#define MASTER_QUP_0 1
+#define MASTER_QUP_2 2
+#define MASTER_A2NOC_CFG 3
+#define MASTER_CRYPTO 4
+#define MASTER_IPA 5
+#define MASTER_SENSORS_PROC 6
+#define MASTER_SP 7
+#define MASTER_QDSS_ETR 8
+#define MASTER_QDSS_ETR_1 9
+#define MASTER_SDCC_2 10
+#define SLAVE_A2NOC_SNOC 11
+#define SLAVE_SERVICE_A2NOC 12
+
+#define MASTER_QUP_CORE_0 0
+#define MASTER_QUP_CORE_1 1
+#define MASTER_QUP_CORE_2 2
+#define SLAVE_QUP_CORE_0 3
+#define SLAVE_QUP_CORE_1 4
+#define SLAVE_QUP_CORE_2 5
+
+#define MASTER_GEM_NOC_CNOC 0
+#define MASTER_GEM_NOC_PCIE_SNOC 1
+#define SLAVE_AHB2PHY_SOUTH 2
+#define SLAVE_AHB2PHY_NORTH 3
+#define SLAVE_AOSS 4
+#define SLAVE_CAMERA_CFG 5
+#define SLAVE_CLK_CTL 6
+#define SLAVE_CDSP_CFG 7
+#define SLAVE_RBCPR_CX_CFG 8
+#define SLAVE_RBCPR_MMCX_CFG 9
+#define SLAVE_RBCPR_MXA_CFG 10
+#define SLAVE_RBCPR_MXC_CFG 11
+#define SLAVE_CRYPTO_0_CFG 12
+#define SLAVE_CX_RDPM 13
+#define SLAVE_DISPLAY_CFG 14
+#define SLAVE_GFX3D_CFG 15
+#define SLAVE_IMEM_CFG 16
+#define SLAVE_IPA_CFG 17
+#define SLAVE_IPC_ROUTER_CFG 18
+#define SLAVE_LPASS 19
+#define SLAVE_CNOC_MSS 20
+#define SLAVE_MX_RDPM 21
+#define SLAVE_PCIE_0_CFG 22
+#define SLAVE_PCIE_1_CFG 23
+#define SLAVE_PDM 24
+#define SLAVE_PIMEM_CFG 25
+#define SLAVE_PRNG 26
+#define SLAVE_QDSS_CFG 27
+#define SLAVE_QSPI_0 28
+#define SLAVE_QUP_0 29
+#define SLAVE_QUP_1 30
+#define SLAVE_QUP_2 31
+#define SLAVE_SDCC_2 32
+#define SLAVE_SDCC_4 33
+#define SLAVE_SPSS_CFG 34
+#define SLAVE_TCSR 35
+#define SLAVE_TLMM 36
+#define SLAVE_TME_CFG 37
+#define SLAVE_UFS_MEM_CFG 38
+#define SLAVE_USB3_0 39
+#define SLAVE_VENUS_CFG 40
+#define SLAVE_VSENSE_CTRL_CFG 41
+#define SLAVE_A1NOC_CFG 42
+#define SLAVE_A2NOC_CFG 43
+#define SLAVE_DDRSS_CFG 44
+#define SLAVE_CNOC_MNOC_CFG 45
+#define SLAVE_PCIE_ANOC_CFG 46
+#define SLAVE_SNOC_CFG 47
+#define SLAVE_IMEM 48
+#define SLAVE_PIMEM 49
+#define SLAVE_SERVICE_CNOC 50
+#define SLAVE_PCIE_0 51
+#define SLAVE_PCIE_1 52
+#define SLAVE_QDSS_STM 53
+#define SLAVE_TCU 54
+
+#define MASTER_GPU_TCU 0
+#define MASTER_SYS_TCU 1
+#define MASTER_APPSS_PROC 2
+#define MASTER_GFX3D 3
+#define MASTER_MSS_PROC 4
+#define MASTER_MNOC_HF_MEM_NOC 5
+#define MASTER_MNOC_SF_MEM_NOC 6
+#define MASTER_COMPUTE_NOC 7
+#define MASTER_ANOC_PCIE_GEM_NOC 8
+#define MASTER_SNOC_GC_MEM_NOC 9
+#define MASTER_SNOC_SF_MEM_NOC 10
+#define SLAVE_GEM_NOC_CNOC 11
+#define SLAVE_LLCC 12
+#define SLAVE_MEM_NOC_PCIE_SNOC 13
+#define MASTER_MNOC_HF_MEM_NOC_DISP 14
+#define MASTER_MNOC_SF_MEM_NOC_DISP 15
+#define MASTER_ANOC_PCIE_GEM_NOC_DISP 16
+#define SLAVE_LLCC_DISP 17
+
+#define MASTER_CNOC_LPASS_AG_NOC 0
+#define MASTER_LPASS_PROC 1
+#define SLAVE_LPASS_CORE_CFG 2
+#define SLAVE_LPASS_LPI_CFG 3
+#define SLAVE_LPASS_MPU_CFG 4
+#define SLAVE_LPASS_TOP_CFG 5
+#define SLAVE_LPASS_SNOC 6
+#define SLAVE_SERVICES_LPASS_AML_NOC 7
+#define SLAVE_SERVICE_LPASS_AG_NOC 8
+
+#define MASTER_LLCC 0
+#define SLAVE_EBI1 1
+#define MASTER_LLCC_DISP 2
+#define SLAVE_EBI1_DISP 3
+
+#define MASTER_CAMNOC_HF 0
+#define MASTER_CAMNOC_ICP 1
+#define MASTER_CAMNOC_SF 2
+#define MASTER_MDP 3
+#define MASTER_CNOC_MNOC_CFG 4
+#define MASTER_ROTATOR 5
+#define MASTER_CDSP_HCP 6
+#define MASTER_VIDEO 7
+#define MASTER_VIDEO_CV_PROC 8
+#define MASTER_VIDEO_PROC 9
+#define MASTER_VIDEO_V_PROC 10
+#define SLAVE_MNOC_HF_MEM_NOC 11
+#define SLAVE_MNOC_SF_MEM_NOC 12
+#define SLAVE_SERVICE_MNOC 13
+#define MASTER_MDP_DISP 14
+#define MASTER_ROTATOR_DISP 15
+#define SLAVE_MNOC_HF_MEM_NOC_DISP 16
+#define SLAVE_MNOC_SF_MEM_NOC_DISP 17
+
+#define MASTER_CDSP_NOC_CFG 0
+#define MASTER_CDSP_PROC 1
+#define SLAVE_CDSP_MEM_NOC 2
+#define SLAVE_SERVICE_NSP_NOC 3
+
+#define MASTER_PCIE_ANOC_CFG 0
+#define MASTER_PCIE_0 1
+#define MASTER_PCIE_1 2
+#define SLAVE_ANOC_PCIE_GEM_NOC 3
+#define SLAVE_SERVICE_PCIE_ANOC 4
+
+#define MASTER_GIC_AHB 0
+#define MASTER_A1NOC_SNOC 1
+#define MASTER_A2NOC_SNOC 2
+#define MASTER_LPASS_ANOC 3
+#define MASTER_SNOC_CFG 4
+#define MASTER_PIMEM 5
+#define MASTER_GIC 6
+#define SLAVE_SNOC_GEM_NOC_GC 7
+#define SLAVE_SNOC_GEM_NOC_SF 8
+#define SLAVE_SERVICE_SNOC 9
+
+#endif
diff --git a/include/dt-bindings/mux/ti-serdes.h b/include/dt-bindings/mux/ti-serdes.h
index d417b9268b16..d3116c52ab72 100644
--- a/include/dt-bindings/mux/ti-serdes.h
+++ b/include/dt-bindings/mux/ti-serdes.h
@@ -95,4 +95,26 @@
#define AM64_SERDES0_LANE0_PCIE0 0x0
#define AM64_SERDES0_LANE0_USB 0x1
+/* J721S2 */
+
+#define J721S2_SERDES0_LANE0_EDP_LANE0 0x0
+#define J721S2_SERDES0_LANE0_PCIE1_LANE0 0x1
+#define J721S2_SERDES0_LANE0_IP3_UNUSED 0x2
+#define J721S2_SERDES0_LANE0_IP4_UNUSED 0x3
+
+#define J721S2_SERDES0_LANE1_EDP_LANE1 0x0
+#define J721S2_SERDES0_LANE1_PCIE1_LANE1 0x1
+#define J721S2_SERDES0_LANE1_USB 0x2
+#define J721S2_SERDES0_LANE1_IP4_UNUSED 0x3
+
+#define J721S2_SERDES0_LANE2_EDP_LANE2 0x0
+#define J721S2_SERDES0_LANE2_PCIE1_LANE2 0x1
+#define J721S2_SERDES0_LANE2_IP3_UNUSED 0x2
+#define J721S2_SERDES0_LANE2_IP4_UNUSED 0x3
+
+#define J721S2_SERDES0_LANE3_EDP_LANE3 0x0
+#define J721S2_SERDES0_LANE3_PCIE1_LANE3 0x1
+#define J721S2_SERDES0_LANE3_USB 0x2
+#define J721S2_SERDES0_LANE3_IP4_UNUSED 0x3
+
#endif /* _DT_BINDINGS_MUX_TI_SERDES */
diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h
index 24fdc9e11bd6..0671991208fc 100644
--- a/include/dt-bindings/phy/phy-cadence.h
+++ b/include/dt-bindings/phy/phy-cadence.h
@@ -6,11 +6,11 @@
#ifndef _DT_BINDINGS_CADENCE_SERDES_H
#define _DT_BINDINGS_CADENCE_SERDES_H
-/* Torrent */
-#define TORRENT_SERDES_NO_SSC 0
-#define TORRENT_SERDES_EXTERNAL_SSC 1
-#define TORRENT_SERDES_INTERNAL_SSC 2
+#define CDNS_SERDES_NO_SSC 0
+#define CDNS_SERDES_EXTERNAL_SSC 1
+#define CDNS_SERDES_INTERNAL_SSC 2
+/* Torrent */
#define CDNS_TORRENT_REFCLK_DRIVER 0
#define CDNS_TORRENT_DERIVED_REFCLK 1
#define CDNS_TORRENT_RECEIVED_REFCLK 2
@@ -18,5 +18,6 @@
/* Sierra */
#define CDNS_SIERRA_PLL_CMNLC 0
#define CDNS_SIERRA_PLL_CMNLC1 1
+#define CDNS_SIERRA_DERIVED_REFCLK 2
#endif /* _DT_BINDINGS_CADENCE_SERDES_H */
diff --git a/include/dt-bindings/phy/phy-imx8-pcie.h b/include/dt-bindings/phy/phy-imx8-pcie.h
new file mode 100644
index 000000000000..8bbe2d6538d8
--- /dev/null
+++ b/include/dt-bindings/phy/phy-imx8-pcie.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+/*
+ * This header provides constants for i.MX8 PCIe.
+ */
+
+#ifndef _DT_BINDINGS_IMX8_PCIE_H
+#define _DT_BINDINGS_IMX8_PCIE_H
+
+/* Reference clock PAD mode */
+#define IMX8_PCIE_REFCLK_PAD_UNUSED 0
+#define IMX8_PCIE_REFCLK_PAD_INPUT 1
+#define IMX8_PCIE_REFCLK_PAD_OUTPUT 2
+
+#endif /* _DT_BINDINGS_IMX8_PCIE_H */
diff --git a/include/dt-bindings/phy/phy-lan966x-serdes.h b/include/dt-bindings/phy/phy-lan966x-serdes.h
new file mode 100644
index 000000000000..4330269a901e
--- /dev/null
+++ b/include/dt-bindings/phy/phy-lan966x-serdes.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+
+#ifndef __PHY_LAN966X_SERDES_H__
+#define __PHY_LAN966X_SERDES_H__
+
+#define CU(x) (x)
+#define CU_MAX CU(2)
+#define SERDES6G(x) (CU_MAX + 1 + (x))
+#define SERDES6G_MAX SERDES6G(3)
+#define RGMII(x) (SERDES6G_MAX + 1 + (x))
+#define RGMII_MAX RGMII(2)
+#define SERDES_MAX (RGMII_MAX + 1)
+
+#endif
diff --git a/include/kunit/assert.h b/include/kunit/assert.h
index ad889b539ab3..ccbc36c0b02f 100644
--- a/include/kunit/assert.h
+++ b/include/kunit/assert.h
@@ -10,7 +10,7 @@
#define _KUNIT_ASSERT_H
#include <linux/err.h>
-#include <linux/kernel.h>
+#include <linux/printk.h>
struct kunit;
struct string_stream;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index e602d848fc1a..bb30a6803d9f 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -5,9 +5,11 @@
#ifndef __KVM_ARM_VGIC_H
#define __KVM_ARM_VGIC_H
-#include <linux/kernel.h>
+#include <linux/bits.h>
#include <linux/kvm.h>
#include <linux/irqreturn.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
diff --git a/include/linux/aio.h b/include/linux/aio.h
index b83e68dd006f..86892a4fe7c8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -20,8 +20,4 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
#endif /* __LINUX__AIO_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a241dcf50f39..7dba0847510c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
#include <linux/align.h>
#include <linux/bitops.h>
+#include <linux/find.h>
#include <linux/limits.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -54,12 +55,6 @@ struct device;
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above
- * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region
- * bitmap_next_set_region(map, &start, &end, nbits) Find next set region
- * bitmap_for_each_clear_region(map, rs, re, start, end)
- * Iterate over all clear regions
- * bitmap_for_each_set_region(map, rs, re, start, end)
- * Iterate over all set regions
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
@@ -466,14 +461,6 @@ static inline void bitmap_replace(unsigned long *dst,
__bitmap_replace(dst, old, new, mask, nbits);
}
-static inline void bitmap_next_clear_region(unsigned long *bitmap,
- unsigned int *rs, unsigned int *re,
- unsigned int end)
-{
- *rs = find_next_zero_bit(bitmap, end, *rs);
- *re = find_next_bit(bitmap, end, *rs + 1);
-}
-
static inline void bitmap_next_set_region(unsigned long *bitmap,
unsigned int *rs, unsigned int *re,
unsigned int end)
@@ -482,25 +469,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
*re = find_next_zero_bit(bitmap, end, *rs + 1);
}
-/*
- * Bitmap region iterators. Iterates over the bitmap between [@start, @end).
- * @rs and @re should be integer variables and will be set to start and end
- * index of the current clear or set region.
- */
-#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), \
- bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, \
- bitmap_next_clear_region((bitmap), &(rs), &(re), (end)))
-
-#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), \
- bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, \
- bitmap_next_set_region((bitmap), &(rs), &(re), (end)))
-
/**
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
* @n: u64 value
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5e62e2383b7f..7aaed501f768 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w);
*/
#include <asm/bitops.h>
-#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_first_bit((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_from(bit, addr, size) \
- for ((bit) = find_next_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-#define for_each_clear_bit(bit, addr, size) \
- for ((bit) = find_first_zero_bit((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_clear_bit() but use bit as value to start with */
-#define for_each_clear_bit_from(bit, addr, size) \
- for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/**
- * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
- * @start: bit offset to start search and to store the current iteration offset
- * @clump: location to store copy of current 8-bit clump
- * @bits: bitmap address to base the search on
- * @size: bitmap size in number of bits
- */
-#define for_each_set_clump8(start, clump, bits, size) \
- for ((start) = find_first_clump8(&(clump), (bits), (size)); \
- (start) < (size); \
- (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
-
static inline int get_bitmask_order(unsigned int count)
{
int order;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..fa517ae604ad 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -316,7 +316,12 @@ enum bpf_type_flag {
*/
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
- __BPF_TYPE_LAST_FLAG = MEM_RDONLY,
+ /* MEM was "allocated" from a different helper, and cannot be mixed
+ * with regular non-MEM_ALLOC'ed MEM types.
+ */
+ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS),
+
+ __BPF_TYPE_LAST_FLAG = MEM_ALLOC,
};
/* Max number of base types. */
@@ -400,7 +405,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
- RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 143401d4c9d9..e9993172f892 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
void
bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
-int check_ctx_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno);
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 4b13e0a3e15b..c9a4c96c9943 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -190,7 +190,7 @@ static inline void be64_add_cpu(__be64 *var, u64 val)
static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
{
- int i;
+ size_t i;
for (i = 0; i < len; i++)
dst[i] = cpu_to_be32(src[i]);
@@ -198,7 +198,7 @@ static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len)
{
- int i;
+ size_t i;
for (i = 0; i < len; i++)
dst[i] = be32_to_cpu(src[i]);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 409d8c29bc4f..6a89ea410e43 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -295,13 +295,13 @@ extern bool libceph_compatible(void *data);
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-extern void *ceph_kvmalloc(size_t size, gfp_t flags);
+extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid);
struct fs_parameter;
struct fc_log;
struct ceph_options *ceph_alloc_options(void);
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l);
+ struct fc_log *l, char delim);
int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
struct fc_log *l);
int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 0e6e9ad3c3bf..ff99ce094cfa 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count);
+ int max_count, int *count, char delim);
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
deleted file mode 100644
index 5f5730c1d324..000000000000
--- a/include/linux/cleancache.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_CLEANCACHE_H
-#define _LINUX_CLEANCACHE_H
-
-#include <linux/fs.h>
-#include <linux/exportfs.h>
-#include <linux/mm.h>
-
-#define CLEANCACHE_NO_POOL -1
-#define CLEANCACHE_NO_BACKEND -2
-#define CLEANCACHE_NO_BACKEND_SHARED -3
-
-#define CLEANCACHE_KEY_MAX 6
-
-/*
- * cleancache requires every file with a page in cleancache to have a
- * unique key unless/until the file is removed/truncated. For some
- * filesystems, the inode number is unique, but for "modern" filesystems
- * an exportable filehandle is required (see exportfs.h)
- */
-struct cleancache_filekey {
- union {
- ino_t ino;
- __u32 fh[CLEANCACHE_KEY_MAX];
- u32 key[CLEANCACHE_KEY_MAX];
- } u;
-};
-
-struct cleancache_ops {
- int (*init_fs)(size_t);
- int (*init_shared_fs)(uuid_t *uuid, size_t);
- int (*get_page)(int, struct cleancache_filekey,
- pgoff_t, struct page *);
- void (*put_page)(int, struct cleancache_filekey,
- pgoff_t, struct page *);
- void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t);
- void (*invalidate_inode)(int, struct cleancache_filekey);
- void (*invalidate_fs)(int);
-};
-
-extern int cleancache_register_ops(const struct cleancache_ops *ops);
-extern void __cleancache_init_fs(struct super_block *);
-extern void __cleancache_init_shared_fs(struct super_block *);
-extern int __cleancache_get_page(struct page *);
-extern void __cleancache_put_page(struct page *);
-extern void __cleancache_invalidate_page(struct address_space *, struct page *);
-extern void __cleancache_invalidate_inode(struct address_space *);
-extern void __cleancache_invalidate_fs(struct super_block *);
-
-#ifdef CONFIG_CLEANCACHE
-#define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
-{
- return mapping->host->i_sb->cleancache_poolid >= 0;
-}
-static inline bool cleancache_fs_enabled(struct page *page)
-{
- return cleancache_fs_enabled_mapping(page->mapping);
-}
-#else
-#define cleancache_enabled (0)
-#define cleancache_fs_enabled(_page) (0)
-#define cleancache_fs_enabled_mapping(_page) (0)
-#endif
-
-/*
- * The shim layer provided by these inline functions allows the compiler
- * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
- * is disabled, to a single global variable check if CONFIG_CLEANCACHE
- * is enabled but no cleancache "backend" has dynamically enabled it,
- * and, for the most frequent cleancache ops, to a single global variable
- * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
- * and a cleancache backend has dynamically enabled cleancache, but the
- * filesystem referenced by that cleancache op has not enabled cleancache.
- * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
- * no measurable performance impact.
- */
-
-static inline void cleancache_init_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_init_fs(sb);
-}
-
-static inline void cleancache_init_shared_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_init_shared_fs(sb);
-}
-
-static inline int cleancache_get_page(struct page *page)
-{
- if (cleancache_enabled && cleancache_fs_enabled(page))
- return __cleancache_get_page(page);
- return -1;
-}
-
-static inline void cleancache_put_page(struct page *page)
-{
- if (cleancache_enabled && cleancache_fs_enabled(page))
- __cleancache_put_page(page);
-}
-
-static inline void cleancache_invalidate_page(struct address_space *mapping,
- struct page *page)
-{
- /* careful... page->mapping is NULL sometimes when this is called */
- if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
- __cleancache_invalidate_page(mapping, page);
-}
-
-static inline void cleancache_invalidate_inode(struct address_space *mapping)
-{
- if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
- __cleancache_invalidate_inode(mapping);
-}
-
-static inline void cleancache_invalidate_fs(struct super_block *sb)
-{
- if (cleancache_enabled)
- __cleancache_invalidate_fs(sb);
-}
-
-#endif /* _LINUX_CLEANCACHE_H */
diff --git a/drivers/comedi/drivers/comedi_8254.h b/include/linux/comedi/comedi_8254.h
index d8264417e53c..d8264417e53c 100644
--- a/drivers/comedi/drivers/comedi_8254.h
+++ b/include/linux/comedi/comedi_8254.h
diff --git a/drivers/comedi/drivers/8255.h b/include/linux/comedi/comedi_8255.h
index ceae3ca52e60..b2a5bc6b3a49 100644
--- a/drivers/comedi/drivers/8255.h
+++ b/include/linux/comedi/comedi_8255.h
@@ -1,14 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * module/8255.h
- * Header file for 8255
+ * comedi_8255.h
+ * Generic 8255 digital I/O subdevice support
*
* COMEDI - Linux Control and Measurement Device Interface
* Copyright (C) 1998 David A. Schleef <ds@schleef.org>
*/
-#ifndef _8255_H
-#define _8255_H
+#ifndef _COMEDI_8255_H
+#define _COMEDI_8255_H
#define I8255_SIZE 0x04
diff --git a/drivers/comedi/drivers/comedi_isadma.h b/include/linux/comedi/comedi_isadma.h
index 9d2b12db7e6e..9d2b12db7e6e 100644
--- a/drivers/comedi/drivers/comedi_isadma.h
+++ b/include/linux/comedi/comedi_isadma.h
diff --git a/drivers/comedi/comedi_pci.h b/include/linux/comedi/comedi_pci.h
index 4e069440cbdc..2fb50663e3ed 100644
--- a/drivers/comedi/comedi_pci.h
+++ b/include/linux/comedi/comedi_pci.h
@@ -11,8 +11,7 @@
#define _COMEDI_PCI_H
#include <linux/pci.h>
-
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
/*
* PCI Vendor IDs not in <linux/pci_ids.h>
diff --git a/drivers/comedi/comedi_pcmcia.h b/include/linux/comedi/comedi_pcmcia.h
index f2f6e779645b..a33dfb65b869 100644
--- a/drivers/comedi/comedi_pcmcia.h
+++ b/include/linux/comedi/comedi_pcmcia.h
@@ -12,8 +12,7 @@
#include <pcmcia/cistpl.h>
#include <pcmcia/ds.h>
-
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
struct pcmcia_device *comedi_to_pcmcia_dev(struct comedi_device *dev);
diff --git a/drivers/comedi/comedi_usb.h b/include/linux/comedi/comedi_usb.h
index 601e29d3891c..5d17dd425bd2 100644
--- a/drivers/comedi/comedi_usb.h
+++ b/include/linux/comedi/comedi_usb.h
@@ -10,8 +10,7 @@
#define _COMEDI_USB_H
#include <linux/usb.h>
-
-#include "comedidev.h"
+#include <linux/comedi/comedidev.h>
struct usb_interface *comedi_to_usb_interface(struct comedi_device *dev);
struct usb_device *comedi_to_usb_dev(struct comedi_device *dev);
diff --git a/drivers/comedi/comedidev.h b/include/linux/comedi/comedidev.h
index 0e1b95ef9a4d..0a1150900ef3 100644
--- a/drivers/comedi/comedidev.h
+++ b/include/linux/comedi/comedidev.h
@@ -15,8 +15,7 @@
#include <linux/spinlock_types.h>
#include <linux/rwsem.h>
#include <linux/kref.h>
-
-#include "comedi.h"
+#include <linux/comedi.h>
#define COMEDI_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
#define COMEDI_VERSION_CODE COMEDI_VERSION(COMEDI_MAJORVERSION, \
diff --git a/drivers/comedi/comedilib.h b/include/linux/comedi/comedilib.h
index 0223c9cd9215..0223c9cd9215 100644
--- a/drivers/comedi/comedilib.h
+++ b/include/linux/comedi/comedilib.h
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 78fcd776b185..248a68c668b4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,10 +14,6 @@ struct core_vma_metadata {
unsigned long dump_size;
};
-extern int core_uses_pid;
-extern char core_pattern[];
-extern unsigned int core_pipe_limit;
-
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
@@ -37,4 +33,10 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
#endif
+#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
+extern void validate_coredump_safety(void);
+#else
+static inline void validate_coredump_safety(void) {}
+#endif
+
#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/counter.h b/include/linux/counter.h
index b7d0a00a61cf..1fe17f5adb09 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -38,64 +38,64 @@ enum counter_comp_type {
* @type: Counter component data type
* @name: device-specific component name
* @priv: component-relevant data
- * @action_read Synapse action mode read callback. The read value of the
+ * @action_read: Synapse action mode read callback. The read value of the
* respective Synapse action mode should be passed back via
* the action parameter.
- * @device_u8_read Device u8 component read callback. The read value of the
+ * @device_u8_read: Device u8 component read callback. The read value of the
* respective Device u8 component should be passed back via
* the val parameter.
- * @count_u8_read Count u8 component read callback. The read value of the
+ * @count_u8_read: Count u8 component read callback. The read value of the
* respective Count u8 component should be passed back via
* the val parameter.
- * @signal_u8_read Signal u8 component read callback. The read value of the
+ * @signal_u8_read: Signal u8 component read callback. The read value of the
* respective Signal u8 component should be passed back via
* the val parameter.
- * @device_u32_read Device u32 component read callback. The read value of
+ * @device_u32_read: Device u32 component read callback. The read value of
* the respective Device u32 component should be passed
* back via the val parameter.
- * @count_u32_read Count u32 component read callback. The read value of the
+ * @count_u32_read: Count u32 component read callback. The read value of the
* respective Count u32 component should be passed back via
* the val parameter.
- * @signal_u32_read Signal u32 component read callback. The read value of
+ * @signal_u32_read: Signal u32 component read callback. The read value of
* the respective Signal u32 component should be passed
* back via the val parameter.
- * @device_u64_read Device u64 component read callback. The read value of
+ * @device_u64_read: Device u64 component read callback. The read value of
* the respective Device u64 component should be passed
* back via the val parameter.
- * @count_u64_read Count u64 component read callback. The read value of the
+ * @count_u64_read: Count u64 component read callback. The read value of the
* respective Count u64 component should be passed back via
* the val parameter.
- * @signal_u64_read Signal u64 component read callback. The read value of
+ * @signal_u64_read: Signal u64 component read callback. The read value of
* the respective Signal u64 component should be passed
* back via the val parameter.
- * @action_write Synapse action mode write callback. The write value of
+ * @action_write: Synapse action mode write callback. The write value of
* the respective Synapse action mode is passed via the
* action parameter.
- * @device_u8_write Device u8 component write callback. The write value of
+ * @device_u8_write: Device u8 component write callback. The write value of
* the respective Device u8 component is passed via the val
* parameter.
- * @count_u8_write Count u8 component write callback. The write value of
+ * @count_u8_write: Count u8 component write callback. The write value of
* the respective Count u8 component is passed via the val
* parameter.
- * @signal_u8_write Signal u8 component write callback. The write value of
+ * @signal_u8_write: Signal u8 component write callback. The write value of
* the respective Signal u8 component is passed via the val
* parameter.
- * @device_u32_write Device u32 component write callback. The write value of
+ * @device_u32_write: Device u32 component write callback. The write value of
* the respective Device u32 component is passed via the
* val parameter.
- * @count_u32_write Count u32 component write callback. The write value of
+ * @count_u32_write: Count u32 component write callback. The write value of
* the respective Count u32 component is passed via the val
* parameter.
- * @signal_u32_write Signal u32 component write callback. The write value of
+ * @signal_u32_write: Signal u32 component write callback. The write value of
* the respective Signal u32 component is passed via the
* val parameter.
- * @device_u64_write Device u64 component write callback. The write value of
+ * @device_u64_write: Device u64 component write callback. The write value of
* the respective Device u64 component is passed via the
* val parameter.
- * @count_u64_write Count u64 component write callback. The write value of
+ * @count_u64_write: Count u64 component write callback. The write value of
* the respective Count u64 component is passed via the val
* parameter.
- * @signal_u64_write Signal u64 component write callback. The write value of
+ * @signal_u64_write: Signal u64 component write callback. The write value of
* the respective Signal u64 component is passed via the
* val parameter.
*/
@@ -314,8 +314,6 @@ struct counter_device {
struct counter_comp *ext;
size_t num_ext;
- void *priv;
-
struct device dev;
struct cdev chrdev;
struct list_head events_list;
@@ -329,10 +327,17 @@ struct counter_device {
struct mutex ops_exist_lock;
};
-int counter_register(struct counter_device *const counter);
+void *counter_priv(const struct counter_device *const counter);
+
+struct counter_device *counter_alloc(size_t sizeof_priv);
+void counter_put(struct counter_device *const counter);
+int counter_add(struct counter_device *const counter);
+
void counter_unregister(struct counter_device *const counter);
-int devm_counter_register(struct device *dev,
- struct counter_device *const counter);
+struct counter_device *devm_counter_alloc(struct device *dev,
+ size_t sizeof_priv);
+int devm_counter_add(struct device *dev,
+ struct counter_device *const counter);
void counter_push_event(struct counter_device *const counter, const u8 event,
const u8 channel);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1e7399fc69c0..64dae70d31f5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -123,6 +123,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
return 0;
}
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+ return 0;
+}
+
+static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return 0;
+}
+
static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
return 0;
@@ -167,7 +178,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p) {
- return cpumask_next_and(-1, src1p, src2p);
+ return cpumask_first_and(src1p, src2p);
}
static inline int cpumask_any_distribute(const struct cpumask *srcp)
@@ -196,6 +207,30 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
}
/**
+ * cpumask_first_zero - get the first unset cpu in a cpumask
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if all cpus are set.
+ */
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+ return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
+ */
+static inline
+unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+ return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+}
+
+/**
* cpumask_last - get the last CPU in a cpumask
* @srcp: - the cpumask pointer
*
@@ -586,15 +621,6 @@ static inline void cpumask_copy(struct cpumask *dstp,
#define cpumask_any(srcp) cpumask_first(srcp)
/**
- * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
- *
- * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
- */
-#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p))
-
-/**
* cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
* @mask1: the first input cpumask
* @mask2: the second input cpumask
diff --git a/include/linux/damon.h b/include/linux/damon.h
index b4d4be3cc987..5e1e3a128b77 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -11,12 +11,19 @@
#include <linux/mutex.h>
#include <linux/time64.h>
#include <linux/types.h>
+#include <linux/random.h>
/* Minimal region size. Every damon_region is aligned by this. */
#define DAMON_MIN_REGION PAGE_SIZE
/* Max priority score for DAMON-based operation schemes */
#define DAMOS_MAX_SCORE (99)
+/* Get a random number in [l, r) */
+static inline unsigned long damon_rand(unsigned long l, unsigned long r)
+{
+ return l + prandom_u32_max(r - l);
+}
+
/**
* struct damon_addr_range - Represents an address region of [@start, @end).
* @start: Start address of the region (inclusive).
@@ -186,6 +193,22 @@ struct damos_watermarks {
};
/**
+ * struct damos_stat - Statistics on a given scheme.
+ * @nr_tried: Total number of regions that the scheme is tried to be applied.
+ * @sz_tried: Total size of regions that the scheme is tried to be applied.
+ * @nr_applied: Total number of regions that the scheme is applied.
+ * @sz_applied: Total size of regions that the scheme is applied.
+ * @qt_exceeds: Total number of times the quota of the scheme has exceeded.
+ */
+struct damos_stat {
+ unsigned long nr_tried;
+ unsigned long sz_tried;
+ unsigned long nr_applied;
+ unsigned long sz_applied;
+ unsigned long qt_exceeds;
+};
+
+/**
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
* @min_sz_region: Minimum size of target regions.
* @max_sz_region: Maximum size of target regions.
@@ -196,8 +219,7 @@ struct damos_watermarks {
* @action: &damo_action to be applied to the target regions.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
- * @stat_count: Total number of regions that this scheme is applied.
- * @stat_sz: Total size of regions that this scheme is applied.
+ * @stat: Statistics of this scheme.
* @list: List head for siblings.
*
* For each aggregation interval, DAMON finds regions which fit in the
@@ -228,8 +250,7 @@ struct damos {
enum damos_action action;
struct damos_quota quota;
struct damos_watermarks wmarks;
- unsigned long stat_count;
- unsigned long stat_sz;
+ struct damos_stat stat;
struct list_head list;
};
@@ -274,7 +295,8 @@ struct damon_ctx;
* as an integer in [0, &DAMOS_MAX_SCORE].
* @apply_scheme is called from @kdamond when a region for user provided
* DAMON-based operation scheme is found. It should apply the scheme's action
- * to the region. This is not used for &DAMON_ARBITRARY_TARGET case.
+ * to the region and return bytes of the region that the action is successfully
+ * applied.
* @target_valid should check whether the target is still valid for the
* monitoring.
* @cleanup is called from @kdamond just before its termination.
@@ -288,8 +310,9 @@ struct damon_primitive {
int (*get_scheme_score)(struct damon_ctx *context,
struct damon_target *t, struct damon_region *r,
struct damos *scheme);
- int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme);
+ unsigned long (*apply_scheme)(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme);
bool (*target_valid)(void *target);
void (*cleanup)(struct damon_ctx *context);
};
@@ -392,14 +415,20 @@ struct damon_ctx {
struct list_head schemes;
};
-#define damon_next_region(r) \
- (container_of(r->list.next, struct damon_region, list))
+static inline struct damon_region *damon_next_region(struct damon_region *r)
+{
+ return container_of(r->list.next, struct damon_region, list);
+}
-#define damon_prev_region(r) \
- (container_of(r->list.prev, struct damon_region, list))
+static inline struct damon_region *damon_prev_region(struct damon_region *r)
+{
+ return container_of(r->list.prev, struct damon_region, list);
+}
-#define damon_last_region(t) \
- (list_last_entry(&t->regions_list, struct damon_region, list))
+static inline struct damon_region *damon_last_region(struct damon_target *t)
+{
+ return list_last_entry(&t->regions_list, struct damon_region, list);
+}
#define damon_for_each_region(r, t) \
list_for_each_entry(r, &t->regions_list, list)
@@ -422,9 +451,18 @@ struct damon_ctx {
#ifdef CONFIG_DAMON
struct damon_region *damon_new_region(unsigned long start, unsigned long end);
-inline void damon_insert_region(struct damon_region *r,
+
+/*
+ * Add a region between two other regions
+ */
+static inline void damon_insert_region(struct damon_region *r,
struct damon_region *prev, struct damon_region *next,
- struct damon_target *t);
+ struct damon_target *t)
+{
+ __list_add(&r->list, &prev->list, &next->list);
+ t->nr_regions++;
+}
+
void damon_add_region(struct damon_region *r, struct damon_target *t);
void damon_destroy_region(struct damon_region *r, struct damon_target *t);
@@ -461,34 +499,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
#endif /* CONFIG_DAMON */
#ifdef CONFIG_DAMON_VADDR
-
-/* Monitoring primitives for virtual memory address spaces */
-void damon_va_init(struct damon_ctx *ctx);
-void damon_va_update(struct damon_ctx *ctx);
-void damon_va_prepare_access_checks(struct damon_ctx *ctx);
-unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
bool damon_va_target_valid(void *t);
-void damon_va_cleanup(struct damon_ctx *ctx);
-int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme);
-int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme);
void damon_va_set_primitives(struct damon_ctx *ctx);
-
#endif /* CONFIG_DAMON_VADDR */
#ifdef CONFIG_DAMON_PADDR
-
-/* Monitoring primitives for the physical memory address space */
-void damon_pa_prepare_access_checks(struct damon_ctx *ctx);
-unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
bool damon_pa_target_valid(void *t);
-int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme);
-int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme);
void damon_pa_set_primitives(struct damon_ctx *ctx);
-
#endif /* CONFIG_DAMON_PADDR */
#endif /* _DAMON_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..f5bba51480b2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -61,16 +61,6 @@ extern const struct qstr empty_name;
extern const struct qstr slash_name;
extern const struct qstr dotdot_name;
-struct dentry_stat_t {
- long nr_dentry;
- long nr_unused;
- long age_limit; /* age in seconds */
- long want_pages; /* pages requested by system */
- long nr_negative; /* # of unused negative dentries */
- long dummy; /* Reserved for future use */
-};
-extern struct dentry_stat_t dentry_stat;
-
/*
* Try to keep struct dentry aligned on 64 byte cachelines (this will
* give reasonable cacheline footprint with larger lines without the
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index af7e6eb50283..3e03d010bd2e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -9,18 +9,9 @@
#include <uapi/linux/taskstats.h>
-/*
- * Per-task flags relevant to delay accounting
- * maintained privately to avoid exhausting similar flags in sched.h:PF_*
- * Used to set current->delays->flags
- */
-#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
-#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */
-
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
raw_spinlock_t lock;
- unsigned int flags; /* Private per-task flags */
/* For each stat XXX, add following, aligned appropriately
*
@@ -37,13 +28,13 @@ struct task_delay_info {
* associated with the operation is added to XXX_delay.
* XXX_delay contains the accumulated delay time in nanoseconds.
*/
- u64 blkio_start; /* Shared by blkio, swapin */
+ u64 blkio_start;
u64 blkio_delay; /* wait for sync block io completion */
- u64 swapin_delay; /* wait for swapin block io completion */
+ u64 swapin_start;
+ u64 swapin_delay; /* wait for swapin */
u32 blkio_count; /* total count of the number of sync block */
/* io operations performed */
- u32 swapin_count; /* total count of the number of swapin block */
- /* io operations performed */
+ u32 swapin_count; /* total count of swapin */
u64 freepages_start;
u64 freepages_delay; /* wait for memory reclaim */
@@ -51,8 +42,12 @@ struct task_delay_info {
u64 thrashing_start;
u64 thrashing_delay; /* wait for thrashing page */
+ u64 compact_start;
+ u64 compact_delay; /* wait for memory compact */
+
u32 freepages_count; /* total count of memory reclaim */
u32 thrashing_count; /* total count of thrash waits */
+ u32 compact_count; /* total count of memory compact */
};
#endif
@@ -79,26 +74,10 @@ extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
extern void __delayacct_thrashing_start(void);
extern void __delayacct_thrashing_end(void);
-
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
-{
- if (p->delays)
- return (p->delays->flags & DELAYACCT_PF_BLKIO);
- else
- return 0;
-}
-
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{
- if (p->delays)
- p->delays->flags |= flag;
-}
-
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{
- if (p->delays)
- p->delays->flags &= ~flag;
-}
+extern void __delayacct_swapin_start(void);
+extern void __delayacct_swapin_end(void);
+extern void __delayacct_compact_start(void);
+extern void __delayacct_compact_end(void);
static inline void delayacct_tsk_init(struct task_struct *tsk)
{
@@ -123,7 +102,6 @@ static inline void delayacct_blkio_start(void)
if (!static_branch_unlikely(&delayacct_key))
return;
- delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
if (current->delays)
__delayacct_blkio_start();
}
@@ -135,7 +113,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
if (p->delays)
__delayacct_blkio_end(p);
- delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
}
static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@@ -147,33 +124,77 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
static inline void delayacct_freepages_start(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_freepages_start();
}
static inline void delayacct_freepages_end(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_freepages_end();
}
static inline void delayacct_thrashing_start(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_thrashing_start();
}
static inline void delayacct_thrashing_end(void)
{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
if (current->delays)
__delayacct_thrashing_end();
}
+static inline void delayacct_swapin_start(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_swapin_start();
+}
+
+static inline void delayacct_swapin_end(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_swapin_end();
+}
+
+static inline void delayacct_compact_start(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_compact_start();
+}
+
+static inline void delayacct_compact_end(void)
+{
+ if (!static_branch_unlikely(&delayacct_key))
+ return;
+
+ if (current->delays)
+ __delayacct_compact_end();
+}
+
#else
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{}
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{}
static inline void delayacct_init(void)
{}
static inline void delayacct_tsk_init(struct task_struct *tsk)
@@ -199,6 +220,14 @@ static inline void delayacct_thrashing_start(void)
{}
static inline void delayacct_thrashing_end(void)
{}
+static inline void delayacct_swapin_start(void)
+{}
+static inline void delayacct_swapin_end(void)
+{}
+static inline void delayacct_compact_start(void)
+{}
+static inline void delayacct_compact_end(void)
+{}
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h
index 83a1377f03f8..02a4adf8921b 100644
--- a/include/linux/dma/xilinx_dpdma.h
+++ b/include/linux/dma/xilinx_dpdma.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_DMA_XILINX_DPDMA_H
#define __LINUX_DMA_XILINX_DPDMA_H
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0349b35235e6..842d4f7ca752 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -50,6 +50,7 @@ enum dma_status {
*/
enum dma_transaction_type {
DMA_MEMCPY,
+ DMA_MEMCPY_SG,
DMA_XOR,
DMA_PQ,
DMA_XOR_VAL,
@@ -887,6 +888,11 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
size_t len, unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)(
+ struct dma_chan *chan,
+ struct scatterlist *dst_sg, unsigned int dst_nents,
+ struct scatterlist *src_sg, unsigned int src_nents,
+ unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags);
@@ -1047,6 +1053,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
len, flags);
}
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg(
+ struct dma_chan *chan,
+ struct scatterlist *dst_sg, unsigned int dst_nents,
+ struct scatterlist *src_sg, unsigned int src_nents,
+ unsigned long flags)
+{
+ if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg)
+ return NULL;
+
+ return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents,
+ src_sg, src_nents,
+ flags);
+}
+
static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan,
enum dma_desc_metadata_mode mode)
{
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b87c3b85a166..b1d26f9f1c9f 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -29,7 +29,6 @@ struct dnotify_struct {
FS_CREATE | FS_RENAME |\
FS_MOVED_FROM | FS_MOVED_TO)
-extern int dir_notify_enable;
extern void dnotify_flush(struct file *, fl_owner_t);
extern int fcntl_dirnotify(int, struct file *, unsigned long);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0de9fb1fdc5a..ccd4d3f91c98 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -148,6 +148,52 @@ typedef struct {
u32 imagesize;
} efi_capsule_header_t;
+/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER */
+struct efi_manage_capsule_header {
+ u32 ver;
+ u16 emb_drv_cnt;
+ u16 payload_cnt;
+ /*
+ * Variable-size array of the size given by the sum of
+ * emb_drv_cnt and payload_cnt.
+ */
+ u64 offset_list[];
+} __packed;
+
+/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER */
+struct efi_manage_capsule_image_header {
+ u32 ver;
+ efi_guid_t image_type_id;
+ u8 image_index;
+ u8 reserved_bytes[3];
+ u32 image_size;
+ u32 vendor_code_size;
+ /* hw_ins was introduced in version 2 */
+ u64 hw_ins;
+ /* capsule_support was introduced in version 3 */
+ u64 capsule_support;
+} __packed;
+
+/* WIN_CERTIFICATE */
+struct win_cert {
+ u32 len;
+ u16 rev;
+ u16 cert_type;
+};
+
+/* WIN_CERTIFICATE_UEFI_GUID */
+struct win_cert_uefi_guid {
+ struct win_cert hdr;
+ efi_guid_t cert_type;
+ u8 cert_data[];
+};
+
+/* EFI_FIRMWARE_IMAGE_AUTHENTICATION */
+struct efi_image_auth {
+ u64 mon_count;
+ struct win_cert_uefi_guid auth_info;
+};
+
/*
* EFI capsule flags
*/
diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index e272c3d452ce..54feb64e9b5d 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -43,6 +43,11 @@ struct compat_elf_prpsinfo
__compat_uid_t pr_uid;
__compat_gid_t pr_gid;
compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
+ /*
+ * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+ * changed as it is exposed to userspace. We'd better make it hard-coded
+ * here.
+ */
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 957ebec35aad..746e081879a5 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -65,6 +65,11 @@ struct elf_prpsinfo
__kernel_gid_t pr_gid;
pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
/* Lots missing */
+ /*
+ * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+ * changed as it is exposed to userspace. We'd better make it hard-coded
+ * here.
+ */
char pr_fname[16]; /* filename of executable */
char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
};
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3260fe714846..fe848901fcc3 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -221,8 +221,6 @@ struct export_operations {
#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
atomic attribute updates
*/
-#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
- asychronous blocking locks */
unsigned long flags;
};
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 3afdf339d53c..419cadcd7ff5 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -5,8 +5,6 @@
#include <linux/sysctl.h>
#include <uapi/linux/fanotify.h>
-extern struct ctl_table fanotify_table[]; /* for sysctl */
-
#define FAN_GROUP_FLAG(group, flag) \
((group)->fanotify_data.flags & (flag))
diff --git a/include/linux/find.h b/include/linux/find.h
new file mode 100644
index 000000000000..5bb6db213bcb
--- /dev/null
+++ b/include/linux/find.h
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FIND_H_
+#define __LINUX_FIND_H_
+
+#ifndef __LINUX_BITMAP_H
+#error only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
+
+extern unsigned long _find_next_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long nbits,
+ unsigned long start, unsigned long invert, unsigned long le);
+extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);
+extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_zero_bit
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+}
+#endif
+
+#ifndef find_first_bit
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_bit(addr, size);
+}
+#endif
+
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+
+#ifndef find_first_zero_bit
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr | ~GENMASK(size - 1, 0);
+
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_first_zero_bit(addr, size);
+}
+#endif
+
+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+static inline
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __fls(val) : size;
+ }
+
+ return _find_last_bit(addr, size);
+}
+#endif
+
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+ const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+ find_next_clump8((clump), (bits), (size), 0)
+
+#if defined(__LITTLE_ENDIAN)
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+ unsigned long size, unsigned long offset)
+{
+ return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+ unsigned long size, unsigned long offset)
+{
+ return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+ unsigned long size)
+{
+ return find_first_zero_bit(addr, size);
+}
+
+#elif defined(__BIG_ENDIAN)
+
+#ifndef find_next_zero_bit_le
+static inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+#endif
+
+#ifndef find_next_bit_le
+static inline
+unsigned long find_next_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
+#endif
+
+#ifndef find_first_zero_bit_le
+#define find_first_zero_bit_le(addr, size) \
+ find_next_zero_bit_le((addr), (size), 0)
+#endif
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), 0); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+#define for_each_clear_bit(bit, addr, size) \
+ for ((bit) = find_next_zero_bit((addr), (size), 0); \
+ (bit) < (size); \
+ (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_clear_bit() but use bit as value to start with */
+#define for_each_clear_bit_from(bit, addr, size) \
+ for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/**
+ * for_each_set_bitrange - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit)
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange(b, e, addr, size) \
+ for ((b) = find_next_bit((addr), (size), 0), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_bit((addr), (size), (e) + 1), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_set_bitrange_from - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange_from(b, e, addr, size) \
+ for ((b) = find_next_bit((addr), (size), (b)), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_bit((addr), (size), (e) + 1), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first unset bit)
+ * @e: bit offset of end of current bitrange (first set bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange(b, e, addr, size) \
+ for ((b) = find_next_zero_bit((addr), (size), 0), \
+ (e) = find_next_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_zero_bit((addr), (size), (e) + 1), \
+ (e) = find_next_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange_from(b, e, addr, size) \
+ for ((b) = find_next_zero_bit((addr), (size), (b)), \
+ (e) = find_next_bit((addr), (size), (b) + 1); \
+ (b) < (size); \
+ (b) = find_next_zero_bit((addr), (size), (e) + 1), \
+ (e) = find_next_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+ for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+ (start) < (size); \
+ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
+#endif /*__LINUX_FIND_H_ */
diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h
new file mode 100644
index 000000000000..3f87c4929d21
--- /dev/null
+++ b/include/linux/firmware/xlnx-event-manager.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_
+#define _FIRMWARE_XLNX_EVENT_MANAGER_H_
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+#define CB_MAX_PAYLOAD_SIZE (4U) /*In payload maximum 32bytes */
+
+/************************** Exported Function *****************************/
+
+typedef void (*event_cb_func_t)(const u32 *payload, void *data);
+
+#if IS_REACHABLE(CONFIG_XLNX_EVENT_MANAGER)
+int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+ const u32 event, const bool wake,
+ event_cb_func_t cb_fun, void *data);
+
+int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+ const u32 event, event_cb_func_t cb_fun);
+#else
+static inline int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+ const u32 event, const bool wake,
+ event_cb_func_t cb_fun, void *data)
+{
+ return -ENODEV;
+}
+
+static inline int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+ const u32 event, event_cb_func_t cb_fun)
+{
+ return -ENODEV;
+}
+#endif
+
+#endif /* _FIRMWARE_XLNX_EVENT_MANAGER_H_ */
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 47fd4e52a423..907cb01890cf 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -2,7 +2,7 @@
/*
* Xilinx Zynq MPSoC Firmware layer
*
- * Copyright (C) 2014-2019 Xilinx
+ * Copyright (C) 2014-2021 Xilinx
*
* Michal Simek <michal.simek@xilinx.com>
* Davorin Mista <davorin.mista@aggios.com>
@@ -64,8 +64,23 @@
#define XILINX_ZYNQMP_PM_FPGA_FULL 0x0U
#define XILINX_ZYNQMP_PM_FPGA_PARTIAL BIT(0)
+/*
+ * Node IDs for the Error Events.
+ */
+#define EVENT_ERROR_PMC_ERR1 (0x28100000U)
+#define EVENT_ERROR_PMC_ERR2 (0x28104000U)
+#define EVENT_ERROR_PSM_ERR1 (0x28108000U)
+#define EVENT_ERROR_PSM_ERR2 (0x2810C000U)
+
+enum pm_api_cb_id {
+ PM_INIT_SUSPEND_CB = 30,
+ PM_ACKNOWLEDGE_CB = 31,
+ PM_NOTIFY_CB = 32,
+};
+
enum pm_api_id {
PM_GET_API_VERSION = 1,
+ PM_REGISTER_NOTIFIER = 5,
PM_SYSTEM_SHUTDOWN = 12,
PM_REQUEST_NODE = 13,
PM_RELEASE_NODE = 14,
@@ -126,6 +141,8 @@ enum pm_ioctl_id {
/* Set healthy bit value */
IOCTL_SET_BOOT_HEALTH_STATUS = 17,
IOCTL_OSPI_MUX_SELECT = 21,
+ /* Register SGI to ATF */
+ IOCTL_REGISTER_SGI = 25,
};
enum pm_query_id {
@@ -427,6 +444,9 @@ int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param,
int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
u32 value);
int zynqmp_pm_load_pdi(const u32 src, const u64 address);
+int zynqmp_pm_register_notifier(const u32 node, const u32 event,
+ const u32 wake, const u32 enable);
+int zynqmp_pm_feature(const u32 api_id);
#else
static inline int zynqmp_pm_get_api_version(u32 *version)
{
@@ -658,6 +678,17 @@ static inline int zynqmp_pm_load_pdi(const u32 src, const u64 address)
{
return -ENODEV;
}
+
+static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event,
+ const u32 wake, const u32 enable)
+{
+ return -ENODEV;
+}
+
+static inline int zynqmp_pm_feature(const u32 api_id)
+{
+ return -ENODEV;
+}
#endif
#endif /* __FIRMWARE_ZYNQMP_H__ */
diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
index 6c3c28806ff1..223da48a6d18 100644
--- a/include/linux/fpga/fpga-bridge.h
+++ b/include/linux/fpga/fpga-bridge.h
@@ -23,6 +23,23 @@ struct fpga_bridge_ops {
};
/**
+ * struct fpga_bridge_info - collection of parameters an FPGA Bridge
+ * @name: fpga bridge name
+ * @br_ops: pointer to structure of fpga bridge ops
+ * @priv: fpga bridge private data
+ *
+ * fpga_bridge_info contains parameters for the register function. These
+ * are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_bridge_info {
+ const char *name;
+ const struct fpga_bridge_ops *br_ops;
+ void *priv;
+};
+
+/**
* struct fpga_bridge - FPGA bridge structure
* @name: name of low level FPGA bridge
* @dev: FPGA bridge device
@@ -62,15 +79,10 @@ int of_fpga_bridge_get_to_list(struct device_node *np,
struct fpga_image_info *info,
struct list_head *bridge_list);
-struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
- const struct fpga_bridge_ops *br_ops,
- void *priv);
-void fpga_bridge_free(struct fpga_bridge *br);
-int fpga_bridge_register(struct fpga_bridge *br);
+struct fpga_bridge *
+fpga_bridge_register(struct device *parent, const char *name,
+ const struct fpga_bridge_ops *br_ops,
+ void *priv);
void fpga_bridge_unregister(struct fpga_bridge *br);
-struct fpga_bridge
-*devm_fpga_bridge_create(struct device *dev, const char *name,
- const struct fpga_bridge_ops *br_ops, void *priv);
-
#endif /* _LINUX_FPGA_BRIDGE_H */
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 474c1f506307..0f9468771bb9 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -106,6 +106,36 @@ struct fpga_image_info {
};
/**
+ * struct fpga_compat_id - id for compatibility check
+ *
+ * @id_h: high 64bit of the compat_id
+ * @id_l: low 64bit of the compat_id
+ */
+struct fpga_compat_id {
+ u64 id_h;
+ u64 id_l;
+};
+
+/**
+ * struct fpga_manager_info - collection of parameters for an FPGA Manager
+ * @name: fpga manager name
+ * @compat_id: FPGA manager id for compatibility check.
+ * @mops: pointer to structure of fpga manager ops
+ * @priv: fpga manager private data
+ *
+ * fpga_manager_info contains parameters for the register_full function.
+ * These are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_manager_info {
+ const char *name;
+ struct fpga_compat_id *compat_id;
+ const struct fpga_manager_ops *mops;
+ void *priv;
+};
+
+/**
* struct fpga_manager_ops - ops for low level fpga manager drivers
* @initial_header_size: Maximum number of bytes that should be passed into write_init
* @state: returns an enum value of the FPGA's state
@@ -144,17 +174,6 @@ struct fpga_manager_ops {
#define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR BIT(4)
/**
- * struct fpga_compat_id - id for compatibility check
- *
- * @id_h: high 64bit of the compat_id
- * @id_l: low 64bit of the compat_id
- */
-struct fpga_compat_id {
- u64 id_h;
- u64 id_l;
-};
-
-/**
* struct fpga_manager - fpga manager structure
* @name: name of low level fpga manager
* @dev: fpga manager device
@@ -191,17 +210,18 @@ struct fpga_manager *fpga_mgr_get(struct device *dev);
void fpga_mgr_put(struct fpga_manager *mgr);
-struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
- const struct fpga_manager_ops *mops,
- void *priv);
-void fpga_mgr_free(struct fpga_manager *mgr);
-int fpga_mgr_register(struct fpga_manager *mgr);
-void fpga_mgr_unregister(struct fpga_manager *mgr);
+struct fpga_manager *
+fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info);
-int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr);
+struct fpga_manager *
+fpga_mgr_register(struct device *parent, const char *name,
+ const struct fpga_manager_ops *mops, void *priv);
+void fpga_mgr_unregister(struct fpga_manager *mgr);
-struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
- const struct fpga_manager_ops *mops,
- void *priv);
+struct fpga_manager *
+devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info);
+struct fpga_manager *
+devm_fpga_mgr_register(struct device *parent, const char *name,
+ const struct fpga_manager_ops *mops, void *priv);
#endif /*_LINUX_FPGA_MGR_H */
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index 27cb706275db..3b87f232425c 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -7,6 +7,27 @@
#include <linux/fpga/fpga-mgr.h>
#include <linux/fpga/fpga-bridge.h>
+struct fpga_region;
+
+/**
+ * struct fpga_region_info - collection of parameters an FPGA Region
+ * @mgr: fpga region manager
+ * @compat_id: FPGA region id for compatibility check.
+ * @priv: fpga region private data
+ * @get_bridges: optional function to get bridges to a list
+ *
+ * fpga_region_info contains parameters for the register_full function.
+ * These are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_region_info {
+ struct fpga_manager *mgr;
+ struct fpga_compat_id *compat_id;
+ void *priv;
+ int (*get_bridges)(struct fpga_region *region);
+};
+
/**
* struct fpga_region - FPGA Region structure
* @dev: FPGA Region device
@@ -37,15 +58,12 @@ struct fpga_region *fpga_region_class_find(
int fpga_region_program_fpga(struct fpga_region *region);
-struct fpga_region
-*fpga_region_create(struct device *dev, struct fpga_manager *mgr,
- int (*get_bridges)(struct fpga_region *));
-void fpga_region_free(struct fpga_region *region);
-int fpga_region_register(struct fpga_region *region);
-void fpga_region_unregister(struct fpga_region *region);
+struct fpga_region *
+fpga_region_register_full(struct device *parent, const struct fpga_region_info *info);
-struct fpga_region
-*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr,
- int (*get_bridges)(struct fpga_region *));
+struct fpga_region *
+fpga_region_register(struct device *parent, struct fpga_manager *mgr,
+ int (*get_bridges)(struct fpga_region *));
+void fpga_region_unregister(struct fpga_region *region);
#endif /* _FPGA_REGION_H */
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index b07d88c92bb2..a631bac12220 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,31 +7,17 @@
#include <linux/bitops.h>
#include <linux/jump_label.h>
-/*
- * Return code to denote that requested number of
- * frontswap pages are unused(moved to page cache).
- * Used in shmem_unuse and try_to_unuse.
- */
-#define FRONTSWAP_PAGES_UNUSED 2
-
struct frontswap_ops {
void (*init)(unsigned); /* this swap type was just swapon'ed */
int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
- struct frontswap_ops *next; /* private pointer to next ops */
};
-extern void frontswap_register_ops(struct frontswap_ops *ops);
-extern void frontswap_shrink(unsigned long);
-extern unsigned long frontswap_curr_pages(void);
-extern void frontswap_writethrough(bool);
-#define FRONTSWAP_HAS_EXCLUSIVE_GETS
-extern void frontswap_tmem_exclusive_gets(bool);
+int frontswap_register_ops(const struct frontswap_ops *ops);
-extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
-extern void __frontswap_init(unsigned type, unsigned long *map);
+extern void frontswap_init(unsigned type, unsigned long *map);
extern int __frontswap_store(struct page *page);
extern int __frontswap_load(struct page *page);
extern void __frontswap_invalidate_page(unsigned, pgoff_t);
@@ -45,11 +31,6 @@ static inline bool frontswap_enabled(void)
return static_branch_unlikely(&frontswap_enabled_key);
}
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
- return __frontswap_test(sis, offset);
-}
-
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
@@ -68,11 +49,6 @@ static inline bool frontswap_enabled(void)
return false;
}
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
- return false;
-}
-
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
@@ -112,11 +88,4 @@ static inline void frontswap_invalidate_area(unsigned type)
__frontswap_invalidate_area(type);
}
-static inline void frontswap_init(unsigned type, unsigned long *map)
-{
-#ifdef CONFIG_FRONTSWAP
- __frontswap_init(type, map);
-#endif
-}
-
#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5d3bf5b69a6..f3daaea16554 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,15 +79,8 @@ extern void __init inode_init_early(void);
extern void __init files_init(void);
extern void __init files_maxfiles_init(void);
-extern struct files_stat_struct files_stat;
extern unsigned long get_max_files(void);
extern unsigned int sysctl_nr_open;
-extern struct inodes_stat_t inodes_stat;
-extern int leases_enable, lease_break_time;
-extern int sysctl_protected_symlinks;
-extern int sysctl_protected_hardlinks;
-extern int sysctl_protected_fifos;
-extern int sysctl_protected_regular;
typedef __kernel_rwf_t rwf_t;
@@ -1221,13 +1214,13 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file,
#if BITS_PER_LONG == 32
static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
- struct flock64 __user *user)
+ struct flock64 *user)
{
return -EINVAL;
}
static inline int fcntl_setlk64(unsigned int fd, struct file *file,
- unsigned int cmd, struct flock64 __user *user)
+ unsigned int cmd, struct flock64 *user)
{
return -EACCES;
}
@@ -1542,11 +1535,6 @@ struct super_block {
const struct dentry_operations *s_d_op; /* default d_op for dentries */
- /*
- * Saved pool identifier for cleancache (-1 means none)
- */
- int cleancache_poolid;
-
struct shrinker s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
@@ -3093,6 +3081,7 @@ extern void unlock_new_inode(struct inode *);
extern void discard_new_inode(struct inode *);
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
+void dump_mapping(const struct address_space *);
/*
* Userspace may rely on the the inode number being non-zero. For example, glibc
@@ -3532,12 +3521,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_dentry(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_inodes(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
int __init list_bdev_fs_names(char *buf, size_t size);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 6b54982fc5f3..13fa6f3df8e4 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param_source(struct fs_context *fc,
struct fs_parameter *param);
extern void fc_drop_locked(struct fs_context *fc);
+int reconfigure_single(struct super_block *s,
+ int flags, void *data);
/*
* sget() wrappers to be called from the ->get_tree() op.
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ede50406bcb0..296c5f1d9f35 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
static inline
void fscache_note_page_release(struct fscache_cookie *cookie)
{
+ /* If we've written data to the cache (HAVE_DATA) and there wasn't any
+ * data in the cache when we started (NO_DATA_TO_READ), it may no
+ * longer be true that we can skip reading from the cache - so clear
+ * the flag that causes reads to be skipped.
+ */
if (cookie &&
test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8fcc38467af6..80f63c862be5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -302,7 +302,9 @@ struct vm_area_struct;
* lowest zone as a type of emergency reserve.
*
* %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
- * address.
+ * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
+ * because the DMA32 kmalloc cache array is not implemented.
+ * (Reason: there is no such user in kernel).
*
* %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
* do not need to be directly accessible by the kernel but that cannot
@@ -598,9 +600,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order);
struct folio *folio_alloc(gfp_t gfp, unsigned order);
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
- int node, bool hugepage);
+ bool hugepage);
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
- alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+ alloc_pages_vma(gfp_mask, order, vma, addr, true)
#else
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
@@ -610,14 +612,14 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
}
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+ alloc_pages_vma(gfp_mask, 0, vma, addr, false)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/hash.h b/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
return val * GOLDEN_RATIO_32;
}
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */
return __hash_32(val) >> (32 - bits);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 00351ccb49a3..d1897a69c540 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -622,8 +622,8 @@ struct hstate {
#endif
#ifdef CONFIG_CGROUP_HUGETLB
/* cgroup control files */
- struct cftype cgroup_files_dfl[7];
- struct cftype cgroup_files_legacy[9];
+ struct cftype cgroup_files_dfl[8];
+ struct cftype cgroup_files_legacy[10];
#endif
char name[HSTATE_NAME_LEN];
};
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index ba025ae27882..379344828e78 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -36,6 +36,11 @@ enum hugetlb_memory_event {
HUGETLB_NR_MEMORY_EVENTS,
};
+struct hugetlb_cgroup_per_node {
+ /* hugetlb usage in pages over all hstates. */
+ unsigned long usage[HUGE_MAX_HSTATE];
+};
+
struct hugetlb_cgroup {
struct cgroup_subsys_state css;
@@ -57,6 +62,8 @@ struct hugetlb_cgroup {
/* Handle for "hugetlb.events.local" */
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
+
+ struct hugetlb_cgroup_per_node *nodeinfo[];
};
static inline struct hugetlb_cgroup *
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b823311eac79..f565a8938836 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1261,6 +1261,7 @@ struct hv_device {
struct vmbus_channel *channel;
struct kset *channels_kset;
+ struct device_dma_parameters dma_parms;
/* place holder to keep track of the dir for hv device in debugfs */
struct dentry *debug_dir;
@@ -1583,6 +1584,11 @@ struct hyperv_service_callback {
void (*callback)(void *context);
};
+struct hv_dma_range {
+ dma_addr_t dma;
+ u32 mapping_size;
+};
+
#define MAX_SRV_VER 0x7ffffff
extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
const int *fw_version, int fw_vercnt,
diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h
index ff15c61bf319..6564bdcdac66 100644
--- a/include/linux/iio/buffer-dma.h
+++ b/include/linux/iio/buffer-dma.h
@@ -17,11 +17,6 @@ struct iio_dma_buffer_queue;
struct iio_dma_buffer_ops;
struct device;
-struct iio_buffer_block {
- u32 size;
- u32 bytes_used;
-};
-
/**
* enum iio_block_state - State of a struct iio_dma_buffer_block
* @IIO_BLOCK_STATE_DEQUEUED: Block is not queued
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 324561b7a5e8..07025d6b3de1 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -103,15 +103,16 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
/**
* IIO_ENUM_AVAILABLE() - Initialize enum available extended channel attribute
* @_name: Attribute name ("_available" will be appended to the name)
+ * @_shared: Whether the attribute is shared between all channels
* @_e: Pointer to an iio_enum struct
*
* Creates a read only attribute which lists all the available enum items in a
* space separated list. This should usually be used together with IIO_ENUM()
*/
-#define IIO_ENUM_AVAILABLE(_name, _e) \
+#define IIO_ENUM_AVAILABLE(_name, _shared, _e) \
{ \
.name = (_name "_available"), \
- .shared = IIO_SHARED_BY_TYPE, \
+ .shared = _shared, \
.read = iio_enum_available_read, \
.private = (uintptr_t)(_e), \
}
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 096f68dd2e0c..4c69b144677b 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -55,6 +55,7 @@ struct iio_trigger_ops {
* @attached_own_device:[INTERN] if we are using our own device as trigger,
* i.e. if we registered a poll function to the same
* device as the one providing the trigger.
+ * @reenable_work: [INTERN] work item used to ensure reenable can sleep.
**/
struct iio_trigger {
const struct iio_trigger_ops *ops;
@@ -74,6 +75,7 @@ struct iio_trigger {
unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)];
struct mutex pool_lock;
bool attached_own_device;
+ struct work_struct reenable_work;
};
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 84b3f8175cc6..a7aa91f3a8dc 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -24,6 +24,7 @@ enum iio_event_info {
#define IIO_VAL_INT_PLUS_NANO 3
#define IIO_VAL_INT_PLUS_MICRO_DB 4
#define IIO_VAL_INT_MULTIPLE 5
+#define IIO_VAL_INT_64 6 /* 64-bit data, val is lower 32 bits */
#define IIO_VAL_FRACTIONAL 10
#define IIO_VAL_FRACTIONAL_LOG2 11
#define IIO_VAL_CHAR 12
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 6a24905f6e1e..8d20caa1b268 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,11 +7,8 @@
#ifndef _LINUX_INOTIFY_H
#define _LINUX_INOTIFY_H
-#include <linux/sysctl.h>
#include <uapi/linux/inotify.h>
-extern struct ctl_table inotify_table[]; /* for sysctl */
-
#define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
IN_MOVED_TO | IN_CREATE | IN_DELETE | \
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index fb78108d694e..4a45562d8893 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -475,12 +475,12 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
* allocations with real shadow memory. With KASAN vmalloc, the special
* case is unnecessary, as the work is handled in the generic case.
*/
-int kasan_module_alloc(void *addr, size_t size);
+int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
void kasan_free_shadow(const struct vm_struct *vm);
#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
+static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 77755ac3e189..33f47a996513 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -1,4 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NOTE:
+ *
+ * This header has combined a lot of unrelated to each other stuff.
+ * The process of splitting its content is in progress while keeping
+ * backward compatibility. That's why it's highly recommended NOT to
+ * include this header inside another header file, especially under
+ * generic or architectural include/ directory.
+ */
#ifndef _LINUX_KERNEL_H
#define _LINUX_KERNEL_H
@@ -187,7 +196,6 @@ static inline void might_fault(void) { }
#endif
void do_exit(long error_code) __noreturn;
-void complete_and_exit(struct completion *, long) __noreturn;
extern int num_to_str(char *buf, int size,
unsigned long long num, unsigned int width);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8c8f7a4d93af..19b884353b15 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -348,12 +348,6 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
DEFINE_INSN_CACHE_OPS(optinsn);
-#ifdef CONFIG_SYSCTL
-extern int sysctl_kprobes_optimization;
-extern int proc_kprobes_optimization_handler(struct ctl_table *table,
- int write, void *buffer,
- size_t *length, loff_t *ppos);
-#endif /* CONFIG_SYSCTL */
extern void wait_for_kprobe_optimizer(void);
#else /* !CONFIG_OPTPROBES */
static inline void wait_for_kprobe_optimizer(void) { }
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 346b0f269161..3df4ea04716f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -33,7 +33,8 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
unsigned int cpu,
const char *namefmt);
-void set_kthread_struct(struct task_struct *p);
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk);
+bool set_kthread_struct(struct task_struct *p);
void kthread_set_per_cpu(struct task_struct *k, int cpu);
bool kthread_is_per_cpu(struct task_struct *k);
@@ -56,6 +57,31 @@ bool kthread_is_per_cpu(struct task_struct *k);
__k; \
})
+/**
+ * kthread_run_on_cpu - create and wake a cpu bound thread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @cpu: The cpu on which the thread should be bound,
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
+ *
+ * Description: Convenient wrapper for kthread_create_on_cpu()
+ * followed by wake_up_process(). Returns the kthread or
+ * ERR_PTR(-ENOMEM).
+ */
+static inline struct task_struct *
+kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
+ unsigned int cpu, const char *namefmt)
+{
+ struct task_struct *p;
+
+ p = kthread_create_on_cpu(threadfn, data, cpu, namefmt);
+ if (!IS_ERR(p))
+ wake_up_process(p);
+
+ return p;
+}
+
void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
@@ -70,6 +96,8 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
+void kthread_exit(long result) __noreturn;
+void kthread_complete_and_exit(struct completion *, long) __noreturn;
int kthreadd(void *unused);
extern struct task_struct *kthreadd_task;
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 120e5e90fa1d..906f899813dc 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -27,9 +27,9 @@ struct kvm_dirty_ring {
int index;
};
-#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+#ifndef CONFIG_HAVE_KVM_DIRTY_RING
/*
- * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should
* not be included as well, so define these nop functions for the arch.
*/
static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
@@ -43,11 +43,6 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
return 0;
}
-static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
-{
- return NULL;
-}
-
static inline int kvm_dirty_ring_reset(struct kvm *kvm,
struct kvm_dirty_ring *ring)
{
@@ -74,11 +69,10 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
return true;
}
-#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#else /* CONFIG_HAVE_KVM_DIRTY_RING */
u32 kvm_dirty_ring_get_rsvd_entries(void);
int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
-struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
/*
* called with kvm->slots_lock held, returns the number of
@@ -98,6 +92,6 @@ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
-#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#endif /* CONFIG_HAVE_KVM_DIRTY_RING */
#endif /* KVM_DIRTY_RING_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1a91e3a70dc0..06912d6b39d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -29,6 +29,10 @@
#include <linux/refcount.h>
#include <linux/nospec.h>
#include <linux/notifier.h>
+#include <linux/hashtable.h>
+#include <linux/interval_tree.h>
+#include <linux/rbtree.h>
+#include <linux/xarray.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -151,6 +155,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -304,13 +309,12 @@ struct kvm_vcpu {
u64 requests;
unsigned long guest_debug;
- int pre_pcpu;
- struct list_head blocked_vcpu_list;
-
struct mutex mutex;
struct kvm_run *run;
+#ifndef __KVM_HAVE_ARCH_WQP
struct rcuwait wait;
+#endif
struct pid __rcu *pid;
int sigset_active;
sigset_t sigset;
@@ -355,11 +359,13 @@ struct kvm_vcpu {
struct kvm_dirty_ring dirty_ring;
/*
- * The index of the most recently used memslot by this vCPU. It's ok
- * if this becomes stale due to memslot changes since we always check
- * it is a valid slot.
+ * The most recently used memslot by this vCPU and the slots generation
+ * for which it is valid.
+ * No wraparound protection is needed since generations won't overflow in
+ * thousands of years, even assuming 1M memslot operations per second.
*/
- int last_used_slot;
+ struct kvm_memory_slot *last_used_slot;
+ u64 last_used_slot_gen;
};
/* must be called with irqs disabled */
@@ -424,7 +430,26 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
*/
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
+/*
+ * Since at idle each memslot belongs to two memslot sets it has to contain
+ * two embedded nodes for each data structure that it forms a part of.
+ *
+ * Two memslot sets (one active and one inactive) are necessary so the VM
+ * continues to run on one memslot set while the other is being modified.
+ *
+ * These two memslot sets normally point to the same set of memslots.
+ * They can, however, be desynchronized when performing a memslot management
+ * operation by replacing the memslot to be modified by its copy.
+ * After the operation is complete, both memslot sets once again point to
+ * the same, common set of memslot data.
+ *
+ * The memslots themselves are independent of each other so they can be
+ * individually added or deleted.
+ */
struct kvm_memory_slot {
+ struct hlist_node id_node[2];
+ struct interval_tree_node hva_node[2];
+ struct rb_node gfn_node[2];
gfn_t base_gfn;
unsigned long npages;
unsigned long *dirty_bitmap;
@@ -435,7 +460,7 @@ struct kvm_memory_slot {
u16 as_id;
};
-static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
+static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
}
@@ -469,6 +494,12 @@ struct kvm_hv_sint {
u32 sint;
};
+struct kvm_xen_evtchn {
+ u32 port;
+ u32 vcpu;
+ u32 priority;
+};
+
struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
@@ -489,6 +520,7 @@ struct kvm_kernel_irq_routing_entry {
} msi;
struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint;
+ struct kvm_xen_evtchn xen_evtchn;
};
struct hlist_node link;
};
@@ -519,18 +551,21 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
}
#endif
-/*
- * Note:
- * memslots are not sorted by id anymore, please use id_to_memslot()
- * to get the memslot by its id.
- */
struct kvm_memslots {
u64 generation;
- /* The mapping table from slot id to the index in memslots[]. */
- short id_to_index[KVM_MEM_SLOTS_NUM];
- atomic_t last_used_slot;
- int used_slots;
- struct kvm_memory_slot memslots[];
+ atomic_long_t last_used_slot;
+ struct rb_root_cached hva_tree;
+ struct rb_root gfn_tree;
+ /*
+ * The mapping table from slot id to memslot.
+ *
+ * 7-bit bucket count matches the size of the old id to index array for
+ * 512 slots, while giving good performance with this slot count.
+ * Higher bucket counts bring only small performance improvements but
+ * always result in higher memory usage (even for lower memslot counts).
+ */
+ DECLARE_HASHTABLE(id_hash, 7);
+ int node_idx;
};
struct kvm {
@@ -551,14 +586,22 @@ struct kvm {
*/
struct mutex slots_arch_lock;
struct mm_struct *mm; /* userspace tied to this vm */
+ unsigned long nr_memslot_pages;
+ /* The two memslot sets - active and inactive (per address space) */
+ struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
+ /* The current active memslot set for each address space */
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
- struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct xarray vcpu_array;
/* Used to wait for completion of MMU notifiers. */
spinlock_t mn_invalidate_lock;
unsigned long mn_active_invalidate_count;
struct rcuwait mn_memslots_update_rcuwait;
+ /* For management / invalidation of gfn_to_pfn_caches */
+ spinlock_t gpc_lock;
+ struct list_head gpc_list;
+
/*
* created_vcpus is protected by kvm->lock, and is incremented
* at the beginning of KVM_CREATE_VCPU. online_vcpus is only
@@ -701,19 +744,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
- return kvm->vcpus[i];
+ return xa_load(&kvm->vcpu_array, i);
}
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
- for (idx = 0; \
- idx < atomic_read(&kvm->online_vcpus) && \
- (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
- idx++)
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+ xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
+ (atomic_read(&kvm->online_vcpus) - 1))
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
if (id < 0)
return NULL;
@@ -727,13 +768,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
return NULL;
}
-#define kvm_for_each_memslot(memslot, slots) \
- for (memslot = &slots->memslots[0]; \
- memslot < slots->memslots + slots->used_slots; memslot++) \
- if (WARN_ON_ONCE(!memslot->npages)) { \
- } else
+static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
+{
+ return vcpu->vcpu_idx;
+}
-void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);
+void kvm_destroy_vcpus(struct kvm *kvm);
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
@@ -793,21 +833,124 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
return __kvm_memslots(vcpu->kvm, as_id);
}
+static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
+{
+ return RB_EMPTY_ROOT(&slots->gfn_tree);
+}
+
+#define kvm_for_each_memslot(memslot, bkt, slots) \
+ hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
+ if (WARN_ON_ONCE(!memslot->npages)) { \
+ } else
+
static inline
struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
{
- int index = slots->id_to_index[id];
struct kvm_memory_slot *slot;
+ int idx = slots->node_idx;
- if (index < 0)
- return NULL;
+ hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) {
+ if (slot->id == id)
+ return slot;
+ }
+
+ return NULL;
+}
- slot = &slots->memslots[index];
+/* Iterator used for walking memslots that overlap a gfn range. */
+struct kvm_memslot_iter {
+ struct kvm_memslots *slots;
+ struct rb_node *node;
+ struct kvm_memory_slot *slot;
+};
- WARN_ON(slot->id != id);
- return slot;
+static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter)
+{
+ iter->node = rb_next(iter->node);
+ if (!iter->node)
+ return;
+
+ iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]);
}
+static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter,
+ struct kvm_memslots *slots,
+ gfn_t start)
+{
+ int idx = slots->node_idx;
+ struct rb_node *tmp;
+ struct kvm_memory_slot *slot;
+
+ iter->slots = slots;
+
+ /*
+ * Find the so called "upper bound" of a key - the first node that has
+ * its key strictly greater than the searched one (the start gfn in our case).
+ */
+ iter->node = NULL;
+ for (tmp = slots->gfn_tree.rb_node; tmp; ) {
+ slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]);
+ if (start < slot->base_gfn) {
+ iter->node = tmp;
+ tmp = tmp->rb_left;
+ } else {
+ tmp = tmp->rb_right;
+ }
+ }
+
+ /*
+ * Find the slot with the lowest gfn that can possibly intersect with
+ * the range, so we'll ideally have slot start <= range start
+ */
+ if (iter->node) {
+ /*
+ * A NULL previous node means that the very first slot
+ * already has a higher start gfn.
+ * In this case slot start > range start.
+ */
+ tmp = rb_prev(iter->node);
+ if (tmp)
+ iter->node = tmp;
+ } else {
+ /* a NULL node below means no slots */
+ iter->node = rb_last(&slots->gfn_tree);
+ }
+
+ if (iter->node) {
+ iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]);
+
+ /*
+ * It is possible in the slot start < range start case that the
+ * found slot ends before or at range start (slot end <= range start)
+ * and so it does not overlap the requested range.
+ *
+ * In such non-overlapping case the next slot (if it exists) will
+ * already have slot start > range start, otherwise the logic above
+ * would have found it instead of the current slot.
+ */
+ if (iter->slot->base_gfn + iter->slot->npages <= start)
+ kvm_memslot_iter_next(iter);
+ }
+}
+
+static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end)
+{
+ if (!iter->node)
+ return false;
+
+ /*
+ * If this slot starts beyond or at the end of the range so does
+ * every next one
+ */
+ return iter->slot->base_gfn < end;
+}
+
+/* Iterate over each memslot at least partially intersecting [start, end) range */
+#define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \
+ for (kvm_memslot_iter_start(iter, slots, start); \
+ kvm_memslot_iter_is_valid(iter, end); \
+ kvm_memslot_iter_next(iter))
+
/*
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
* - create a new memory slot
@@ -833,11 +976,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change);
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change);
@@ -863,9 +1005,9 @@ void kvm_set_page_accessed(struct page *page);
kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
bool *writable, hva_t *hva);
@@ -942,7 +1084,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
@@ -966,10 +1108,109 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
unsigned long len);
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
+/**
+ * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
+ * given guest physical address.
+ *
+ * @kvm: pointer to kvm instance.
+ * @gpc: struct gfn_to_pfn_cache object.
+ * @vcpu: vCPU to be used for marking pages dirty and to be woken on
+ * invalidation.
+ * @guest_uses_pa: indicates that the resulting host physical PFN is used while
+ * @vcpu is IN_GUEST_MODE so invalidations should wake it.
+ * @kernel_map: requests a kernel virtual mapping (kmap / memremap).
+ * @gpa: guest physical address to map.
+ * @len: sanity check; the range being access must fit a single page.
+ * @dirty: mark the cache dirty immediately.
+ *
+ * @return: 0 for success.
+ * -EINVAL for a mapping which would cross a page boundary.
+ * -EFAULT for an untranslatable guest physical address.
+ *
+ * This primes a gfn_to_pfn_cache and links it into the @kvm's list for
+ * invalidations to be processed. Invalidation callbacks to @vcpu using
+ * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM
+ * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check()
+ * to ensure that the cache is valid before accessing the target page.
+ */
+int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ struct kvm_vcpu *vcpu, bool guest_uses_pa,
+ bool kernel_map, gpa_t gpa, unsigned long len,
+ bool dirty);
+
+/**
+ * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
+ *
+ * @kvm: pointer to kvm instance.
+ * @gpc: struct gfn_to_pfn_cache object.
+ * @gpa: current guest physical address to map.
+ * @len: sanity check; the range being access must fit a single page.
+ * @dirty: mark the cache dirty immediately.
+ *
+ * @return: %true if the cache is still valid and the address matches.
+ * %false if the cache is not valid.
+ *
+ * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock
+ * while calling this function, and then continue to hold the lock until the
+ * access is complete.
+ *
+ * Callers in IN_GUEST_MODE may do so without locking, although they should
+ * still hold a read lock on kvm->scru for the memslot checks.
+ */
+bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ gpa_t gpa, unsigned long len);
+
+/**
+ * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache.
+ *
+ * @kvm: pointer to kvm instance.
+ * @gpc: struct gfn_to_pfn_cache object.
+ * @gpa: updated guest physical address to map.
+ * @len: sanity check; the range being access must fit a single page.
+ * @dirty: mark the cache dirty immediately.
+ *
+ * @return: 0 for success.
+ * -EINVAL for a mapping which would cross a page boundary.
+ * -EFAULT for an untranslatable guest physical address.
+ *
+ * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful
+ * returm from this function does not mean the page can be immediately
+ * accessed because it may have raced with an invalidation. Callers must
+ * still lock and check the cache status, as this function does not return
+ * with the lock still held to permit access.
+ */
+int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ gpa_t gpa, unsigned long len, bool dirty);
+
+/**
+ * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
+ *
+ * @kvm: pointer to kvm instance.
+ * @gpc: struct gfn_to_pfn_cache object.
+ *
+ * This unmaps the referenced page and marks it dirty, if appropriate. The
+ * cache is left in the invalid state but at least the mapping from GPA to
+ * userspace HVA will remain cached and can be reused on a subsequent
+ * refresh.
+ */
+void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+
+/**
+ * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
+ *
+ * @kvm: pointer to kvm instance.
+ * @gpc: struct gfn_to_pfn_cache object.
+ *
+ * This removes a cache from the @kvm's list to be processed on MMU notifier
+ * invocation.
+ */
+void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
-void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
@@ -1152,6 +1393,20 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns
+ * true if the vCPU was blocking and was awakened, false otherwise.
+ */
+static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
+{
+ return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+}
+
+static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu)
+{
+ return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu));
+}
+
#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
/*
* returns true if the virtual interrupt controller is initialized and
@@ -1184,7 +1439,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@ -1215,25 +1469,15 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
- * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Returns a pointer to the memslot if it contains gfn.
* Otherwise returns NULL.
*/
static inline struct kvm_memory_slot *
-try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- struct kvm_memory_slot *slot;
-
- if (slot_index < 0 || slot_index >= slots->used_slots)
+ if (!slot)
return NULL;
- /*
- * slot_index can come from vcpu->last_used_slot which is not kept
- * in sync with userspace-controllable memslot deletion. So use nospec
- * to prevent the CPU from speculating past the end of memslots[].
- */
- slot_index = array_index_nospec(slot_index, slots->used_slots);
- slot = &slots->memslots[slot_index];
-
if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
return slot;
else
@@ -1241,63 +1485,63 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
}
/*
- * Returns a pointer to the memslot that contains gfn and records the index of
- * the slot in index. Otherwise returns NULL.
+ * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL.
*
- * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
+ * With "approx" set returns the memslot also when the address falls
+ * in a hole. In that case one of the memslots bordering the hole is
+ * returned.
*/
static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx)
{
- int start = 0, end = slots->used_slots;
- struct kvm_memory_slot *memslots = slots->memslots;
struct kvm_memory_slot *slot;
-
- if (unlikely(!slots->used_slots))
- return NULL;
-
- while (start < end) {
- int slot = start + (end - start) / 2;
-
- if (gfn >= memslots[slot].base_gfn)
- end = slot;
- else
- start = slot + 1;
- }
-
- slot = try_get_memslot(slots, start, gfn);
- if (slot) {
- *index = start;
- return slot;
+ struct rb_node *node;
+ int idx = slots->node_idx;
+
+ slot = NULL;
+ for (node = slots->gfn_tree.rb_node; node; ) {
+ slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]);
+ if (gfn >= slot->base_gfn) {
+ if (gfn < slot->base_gfn + slot->npages)
+ return slot;
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
- return NULL;
+ return approx ? slot : NULL;
}
-/*
- * __gfn_to_memslot() and its descendants are here because it is called from
- * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
- * itself isn't here as an inline because that would bloat other code too much.
- */
static inline struct kvm_memory_slot *
-__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx)
{
struct kvm_memory_slot *slot;
- int slot_index = atomic_read(&slots->last_used_slot);
- slot = try_get_memslot(slots, slot_index, gfn);
+ slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot);
+ slot = try_get_memslot(slot, gfn);
if (slot)
return slot;
- slot = search_memslots(slots, gfn, &slot_index);
+ slot = search_memslots(slots, gfn, approx);
if (slot) {
- atomic_set(&slots->last_used_slot, slot_index);
+ atomic_long_set(&slots->last_used_slot, (unsigned long)slot);
return slot;
}
return NULL;
}
+/*
+ * __gfn_to_memslot() and its descendants are here to allow arch code to inline
+ * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline
+ * because that would bloat other code too much.
+ */
+static inline struct kvm_memory_slot *
+__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+{
+ return ____gfn_to_memslot(slots, gfn, false);
+}
+
static inline unsigned long
__gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
{
@@ -1473,7 +1717,8 @@ struct _kvm_stats_desc {
STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \
HALT_POLL_HIST_COUNT), \
STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
- HALT_POLL_HIST_COUNT)
+ HALT_POLL_HIST_COUNT), \
+ STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking)
extern struct dentry *kvm_debugfs_dir;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 234eab059839..dceac12c1ce5 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -19,6 +19,7 @@ struct kvm_memslots;
enum kvm_mr_change;
#include <linux/types.h>
+#include <linux/spinlock_types.h>
#include <asm/kvm_types.h>
@@ -53,6 +54,23 @@ struct gfn_to_hva_cache {
struct kvm_memory_slot *memslot;
};
+struct gfn_to_pfn_cache {
+ u64 generation;
+ gpa_t gpa;
+ unsigned long uhva;
+ struct kvm_memory_slot *memslot;
+ struct kvm_vcpu *vcpu;
+ struct list_head list;
+ rwlock_t lock;
+ void *khva;
+ kvm_pfn_t pfn;
+ bool active;
+ bool valid;
+ bool dirty;
+ bool kernel_map;
+ bool guest_uses_pa;
+};
+
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
/*
* Memory caches are used to preallocate memory ahead of various MMU flows,
@@ -87,6 +105,7 @@ struct kvm_vcpu_stat_generic {
u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT];
u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
+ u64 blocking;
};
#define KVM_STATS_NAME_SIZE 48
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2a8404b26083..605756f645be 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -39,25 +39,9 @@
* compile-time options: to be removed as soon as all the drivers are
* converted to the new debugging mechanism
*/
-#undef ATA_DEBUG /* debugging output */
-#undef ATA_VERBOSE_DEBUG /* yet more debugging output */
#undef ATA_IRQ_TRAP /* define to ack screaming irqs */
-#undef ATA_NDEBUG /* define to disable quick runtime checks */
-/* note: prints function name for you */
-#ifdef ATA_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#ifdef ATA_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define VPRINTK(fmt, args...)
-#endif /* ATA_VERBOSE_DEBUG */
-#else
-#define DPRINTK(fmt, args...)
-#define VPRINTK(fmt, args...)
-#endif /* ATA_DEBUG */
-
#define ata_print_version_once(dev, version) \
({ \
static bool __print_once; \
@@ -68,38 +52,6 @@
} \
})
-/* NEW: debug levels */
-#define HAVE_LIBATA_MSG 1
-
-enum {
- ATA_MSG_DRV = 0x0001,
- ATA_MSG_INFO = 0x0002,
- ATA_MSG_PROBE = 0x0004,
- ATA_MSG_WARN = 0x0008,
- ATA_MSG_MALLOC = 0x0010,
- ATA_MSG_CTL = 0x0020,
- ATA_MSG_INTR = 0x0040,
- ATA_MSG_ERR = 0x0080,
-};
-
-#define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV)
-#define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO)
-#define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE)
-#define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN)
-#define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC)
-#define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL)
-#define ata_msg_intr(p) ((p)->msg_enable & ATA_MSG_INTR)
-#define ata_msg_err(p) ((p)->msg_enable & ATA_MSG_ERR)
-
-static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
-{
- if (dval < 0 || dval >= (sizeof(u32) * 8))
- return default_msg_enable_bits; /* should be 0x1 - only driver info msgs */
- if (!dval)
- return 0;
- return (1 << dval) - 1;
-}
-
/* defines only for the constants which don't work well as enums */
#define ATA_TAG_POISON 0xfafbfcfdU
@@ -191,7 +143,7 @@ enum {
ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */
ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */
ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */
- ATA_LFLAG_NO_DB_DELAY = (1 << 11), /* no debounce delay on link resume */
+ ATA_LFLAG_NO_DEBOUNCE_DELAY = (1 << 11), /* no debounce delay on link resume */
/* struct ata_port flags */
ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
@@ -884,7 +836,6 @@ struct ata_port {
unsigned int hsm_task_state;
- u32 msg_enable;
struct list_head eh_done_q;
wait_queue_head_t eh_wait_q;
int eh_tries;
@@ -933,7 +884,8 @@ struct ata_port_operations {
void (*set_piomode)(struct ata_port *ap, struct ata_device *dev);
void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev);
int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev);
- unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id);
+ unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf,
+ __le16 *id);
void (*dev_config)(struct ata_device *dev);
@@ -1160,13 +1112,15 @@ extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc);
extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
unsigned int n_elem);
extern unsigned int ata_dev_classify(const struct ata_taskfile *tf);
+extern unsigned int ata_port_classify(struct ata_port *ap,
+ const struct ata_taskfile *tf);
extern void ata_dev_disable(struct ata_device *adev);
extern void ata_id_string(const u16 *id, unsigned char *s,
unsigned int ofs, unsigned int len);
extern void ata_id_c_string(const u16 *id, unsigned char *s,
unsigned int ofs, unsigned int len);
extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
- struct ata_taskfile *tf, u16 *id);
+ struct ata_taskfile *tf, __le16 *id);
extern void ata_qc_complete(struct ata_queued_cmd *qc);
extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
@@ -1432,6 +1386,12 @@ extern const struct attribute_group *ata_common_sdev_groups[];
.tag_alloc_policy = BLK_TAG_ALLOC_RR, \
.slave_configure = ata_scsi_slave_config
+#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \
+ __ATA_BASE_SHT(drv_name), \
+ .can_queue = drv_qd, \
+ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \
+ .slave_configure = ata_scsi_slave_config
+
#define ATA_BASE_SHT(drv_name) \
ATA_SUBBASE_SHT(drv_name), \
.sdev_groups = ata_common_sdev_groups
@@ -1443,6 +1403,11 @@ extern const struct attribute_group *ata_ncq_sdev_groups[];
ATA_SUBBASE_SHT(drv_name), \
.sdev_groups = ata_ncq_sdev_groups, \
.change_queue_depth = ata_scsi_change_queue_depth
+
+#define ATA_NCQ_SHT_QD(drv_name, drv_qd) \
+ ATA_SUBBASE_SHT_QD(drv_name, drv_qd), \
+ .sdev_groups = ata_ncq_sdev_groups, \
+ .change_queue_depth = ata_scsi_change_queue_depth
#endif
/*
@@ -1487,51 +1452,61 @@ static inline int sata_srst_pmp(struct ata_link *link)
return link->pmp;
}
-/*
- * printk helpers
- */
-__printf(3, 4)
-void ata_port_printk(const struct ata_port *ap, const char *level,
- const char *fmt, ...);
-__printf(3, 4)
-void ata_link_printk(const struct ata_link *link, const char *level,
- const char *fmt, ...);
-__printf(3, 4)
-void ata_dev_printk(const struct ata_device *dev, const char *level,
- const char *fmt, ...);
+#define ata_port_printk(level, ap, fmt, ...) \
+ pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__)
#define ata_port_err(ap, fmt, ...) \
- ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__)
+ ata_port_printk(err, ap, fmt, ##__VA_ARGS__)
#define ata_port_warn(ap, fmt, ...) \
- ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__)
+ ata_port_printk(warn, ap, fmt, ##__VA_ARGS__)
#define ata_port_notice(ap, fmt, ...) \
- ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__)
+ ata_port_printk(notice, ap, fmt, ##__VA_ARGS__)
#define ata_port_info(ap, fmt, ...) \
- ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__)
+ ata_port_printk(info, ap, fmt, ##__VA_ARGS__)
#define ata_port_dbg(ap, fmt, ...) \
- ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__)
+ ata_port_printk(debug, ap, fmt, ##__VA_ARGS__)
+
+#define ata_link_printk(level, link, fmt, ...) \
+do { \
+ if (sata_pmp_attached((link)->ap) || \
+ (link)->ap->slave_link) \
+ pr_ ## level ("ata%u.%02u: " fmt, \
+ (link)->ap->print_id, \
+ (link)->pmp, \
+ ##__VA_ARGS__); \
+ else \
+ pr_ ## level ("ata%u: " fmt, \
+ (link)->ap->print_id, \
+ ##__VA_ARGS__); \
+} while (0)
#define ata_link_err(link, fmt, ...) \
- ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__)
+ ata_link_printk(err, link, fmt, ##__VA_ARGS__)
#define ata_link_warn(link, fmt, ...) \
- ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__)
+ ata_link_printk(warn, link, fmt, ##__VA_ARGS__)
#define ata_link_notice(link, fmt, ...) \
- ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__)
+ ata_link_printk(notice, link, fmt, ##__VA_ARGS__)
#define ata_link_info(link, fmt, ...) \
- ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__)
+ ata_link_printk(info, link, fmt, ##__VA_ARGS__)
#define ata_link_dbg(link, fmt, ...) \
- ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__)
+ ata_link_printk(debug, link, fmt, ##__VA_ARGS__)
+
+#define ata_dev_printk(level, dev, fmt, ...) \
+ pr_ ## level("ata%u.%02u: " fmt, \
+ (dev)->link->ap->print_id, \
+ (dev)->link->pmp + (dev)->devno, \
+ ##__VA_ARGS__)
#define ata_dev_err(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__)
+ ata_dev_printk(err, dev, fmt, ##__VA_ARGS__)
#define ata_dev_warn(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__)
+ ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__)
#define ata_dev_notice(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__)
+ ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__)
#define ata_dev_info(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__)
+ ata_dev_printk(info, dev, fmt, ##__VA_ARGS__)
#define ata_dev_dbg(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__)
+ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__)
void ata_print_version(const struct device *dev, const char *version);
@@ -2065,11 +2040,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap)
{
u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
-#ifdef ATA_DEBUG
if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ)))
- ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n",
- status);
-#endif
+ ata_port_dbg(ap, "abnormal Status 0x%X\n", status);
return status;
}
diff --git a/include/linux/list.h b/include/linux/list.h
index 6636fc07f918..dd6c2041d09c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -258,8 +258,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
* @list: the entry to test
* @head: the head of the list
*/
-static inline int list_is_first(const struct list_head *list,
- const struct list_head *head)
+static inline int list_is_first(const struct list_head *list, const struct list_head *head)
{
return list->prev == head;
}
@@ -269,13 +268,22 @@ static inline int list_is_first(const struct list_head *list,
* @list: the entry to test
* @head: the head of the list
*/
-static inline int list_is_last(const struct list_head *list,
- const struct list_head *head)
+static inline int list_is_last(const struct list_head *list, const struct list_head *head)
{
return list->next == head;
}
/**
+ * list_is_head - tests whether @list is the list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_head(const struct list_head *list, const struct list_head *head)
+{
+ return list == head;
+}
+
+/**
* list_empty - tests whether a list is empty
* @head: the list to test.
*/
@@ -318,7 +326,7 @@ static inline void list_del_init_careful(struct list_head *entry)
static inline int list_empty_careful(const struct list_head *head)
{
struct list_head *next = smp_load_acquire(&head->next);
- return (next == head) && (next == head->prev);
+ return list_is_head(next, head) && (next == head->prev);
}
/**
@@ -393,10 +401,9 @@ static inline void list_cut_position(struct list_head *list,
{
if (list_empty(head))
return;
- if (list_is_singular(head) &&
- (head->next != entry && head != entry))
+ if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
return;
- if (entry == head)
+ if (list_is_head(entry, head))
INIT_LIST_HEAD(list);
else
__list_cut_position(list, head, entry);
@@ -570,7 +577,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
+ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
/**
* list_for_each_continue - continue iteration over a list
@@ -580,7 +587,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Continue to iterate over a list, continuing after the current position.
*/
#define list_for_each_continue(pos, head) \
- for (pos = pos->next; pos != (head); pos = pos->next)
+ for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)
/**
* list_for_each_prev - iterate over a list backwards
@@ -588,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev; pos != (head); pos = pos->prev)
+ for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
@@ -597,8 +604,9 @@ static inline void list_splice_tail_init(struct list_head *list,
* @head: the head for your list.
*/
#define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
+ for (pos = (head)->next, n = pos->next; \
+ !list_is_head(pos, (head)); \
+ pos = n, n = pos->next)
/**
* list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
@@ -608,7 +616,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_prev_safe(pos, n, head) \
for (pos = (head)->prev, n = pos->prev; \
- pos != (head); \
+ !list_is_head(pos, (head)); \
pos = n, n = pos->prev)
/**
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c4ae6506b8b3..fcef192e5e45 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -303,10 +303,15 @@ void nlmsvc_invalidate_all(void);
int nlmsvc_unlock_all_by_sb(struct super_block *sb);
int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+static inline struct file *nlmsvc_file_file(struct nlm_file *file)
+{
+ return file->f_file[O_RDONLY] ?
+ file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
+}
+
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
- return locks_inode(file->f_file[O_RDONLY] ?
- file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
+ return locks_inode(nlmsvc_file_file(file));
}
static inline int __nlm_privileged_request4(const struct sockaddr *sap)
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 0661af17a758..808bb4cee230 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -123,7 +123,11 @@ struct cmos_rtc_board_info {
#define RTC_IO_EXTENT_USED RTC_IO_EXTENT
#endif /* ARCH_RTC_LOCATION */
-unsigned int mc146818_get_time(struct rtc_time *time);
+bool mc146818_does_rtc_work(void);
+int mc146818_get_time(struct rtc_time *time);
int mc146818_set_time(struct rtc_time *time);
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ void *param);
+
#endif /* _MC146818RTC_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9dc7cb239d21..50ad19662a32 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_MEMBLOCK_H
#define _LINUX_MEMBLOCK_H
-#ifdef __KERNEL__
/*
* Logical memory blocks.
@@ -605,6 +604,5 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
}
#endif
-#endif /* __KERNEL__ */
#endif /* _LINUX_MEMBLOCK_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e34112f6a369..b72d75141e12 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -33,6 +33,7 @@ enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
MEMCG_PERCPU_B,
+ MEMCG_VMALLOC,
MEMCG_NR_STAT,
};
@@ -42,6 +43,7 @@ enum memcg_memory_event {
MEMCG_MAX,
MEMCG_OOM,
MEMCG_OOM_KILL,
+ MEMCG_OOM_GROUP_KILL,
MEMCG_SWAP_HIGH,
MEMCG_SWAP_MAX,
MEMCG_SWAP_FAIL,
@@ -943,6 +945,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
local_irq_restore(flags);
}
+static inline void mod_memcg_page_state(struct page *page,
+ int idx, int val)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = page_memcg(page);
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
+ rcu_read_unlock();
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
return READ_ONCE(memcg->vmstats.state[idx]);
@@ -1398,6 +1415,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
{
}
+static inline void mod_memcg_page_state(struct page *page,
+ int idx, int val)
+{
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
return 0;
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3c7595e81150..668389b4b53d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -46,6 +46,7 @@ struct mempolicy {
unsigned short mode; /* See MPOL_* above */
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
nodemask_t nodes; /* interleave/bind/perfer */
+ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */
union {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index a8bc588fe7aa..1fafcc38acba 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -88,6 +88,11 @@ struct dev_pagemap_ops {
* @done: completion for @ref
* @type: memory type: see MEMORY_* in memory_hotplug.h
* @flags: PGMAP_* flags to specify defailed behavior
+ * @vmemmap_shift: structural definition of how the vmemmap page metadata
+ * is populated, specifically the metadata page order.
+ * A zero value (default) uses base pages as the vmemmap metadata
+ * representation. A bigger value will set up compound struct pages
+ * of the requested order value.
* @ops: method table
* @owner: an opaque pointer identifying the entity that manages this
* instance. Used by various helpers to make sure that no
@@ -102,6 +107,7 @@ struct dev_pagemap {
struct completion done;
enum memory_type type;
unsigned int flags;
+ unsigned long vmemmap_shift;
const struct dev_pagemap_ops *ops;
void *owner;
int nr_range;
@@ -118,6 +124,11 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
return NULL;
}
+static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
+{
+ return 1 << pgmap->vmemmap_shift;
+}
+
#ifdef CONFIG_ZONE_DEVICE
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index a5cc4cdf9cc8..a5441ad33c74 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -730,16 +730,27 @@ void mhi_device_put(struct mhi_device *mhi_dev);
/**
* mhi_prepare_for_transfer - Setup UL and DL channels for data transfer.
- * Allocate and initialize the channel context and
- * also issue the START channel command to both
- * channels. Channels can be started only if both
- * host and device execution environments match and
- * channels are in a DISABLED state.
* @mhi_dev: Device associated with the channels
+ *
+ * Allocate and initialize the channel context and also issue the START channel
+ * command to both channels. Channels can be started only if both host and
+ * device execution environments match and channels are in a DISABLED state.
*/
int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
/**
+ * mhi_prepare_for_transfer_autoqueue - Setup UL and DL channels with auto queue
+ * buffers for DL traffic
+ * @mhi_dev: Device associated with the channels
+ *
+ * Allocate and initialize the channel context and also issue the START channel
+ * command to both channels. Channels can be started only if both host and
+ * device execution environments match and channels are in a DISABLED state.
+ * The MHI core will automatically allocate and queue buffers for the DL traffic.
+ */
+int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev);
+
+/**
* mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
* Issue the RESET channel command and let the
* device clean-up the context so no incoming
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4850cc5bf813..db96e10eb8da 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+ spinlock_t *ptl);
void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
int folio_migrate_mapping(struct address_space *mapping,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c768a7c81b0b..e1a84b1e6787 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -424,51 +424,6 @@ extern unsigned int kobjsize(const void *objp);
*/
extern pgprot_t protection_map[16];
-/**
- * enum fault_flag - Fault flag definitions.
- * @FAULT_FLAG_WRITE: Fault was a write fault.
- * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
- * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
- * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
- * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
- * @FAULT_FLAG_TRIED: The fault has been tried once.
- * @FAULT_FLAG_USER: The fault originated in userspace.
- * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
- * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
- * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
- *
- * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
- * whether we would allow page faults to retry by specifying these two
- * fault flags correctly. Currently there can be three legal combinations:
- *
- * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
- * this is the first try
- *
- * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
- * we've already tried at least once
- *
- * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
- *
- * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
- * be used. Note that page faults can be allowed to retry for multiple times,
- * in which case we'll have an initial fault with flags (a) then later on
- * continuous faults with flags (b). We should always try to detect pending
- * signals before a retry to make sure the continuous page faults can still be
- * interrupted if necessary.
- */
-enum fault_flag {
- FAULT_FLAG_WRITE = 1 << 0,
- FAULT_FLAG_MKWRITE = 1 << 1,
- FAULT_FLAG_ALLOW_RETRY = 1 << 2,
- FAULT_FLAG_RETRY_NOWAIT = 1 << 3,
- FAULT_FLAG_KILLABLE = 1 << 4,
- FAULT_FLAG_TRIED = 1 << 5,
- FAULT_FLAG_USER = 1 << 6,
- FAULT_FLAG_REMOTE = 1 << 7,
- FAULT_FLAG_INSTRUCTION = 1 << 8,
- FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
-};
-
/*
* The default fault flags that should be used by most of the
* arch-specific page fault handlers.
@@ -577,6 +532,10 @@ enum page_entry_size {
*/
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
+ /**
+ * @close: Called when the VMA is being removed from the MM.
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
void (*close)(struct vm_area_struct * area);
/* Called any time before splitting to check if it's allowed */
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
@@ -861,19 +820,15 @@ static inline int page_mapcount(struct page *page)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int total_mapcount(struct page *page);
-int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
+int page_trans_huge_mapcount(struct page *page);
#else
static inline int total_mapcount(struct page *page)
{
return page_mapcount(page);
}
-static inline int page_trans_huge_mapcount(struct page *page,
- int *total_mapcount)
+static inline int page_trans_huge_mapcount(struct page *page)
{
- int mapcount = page_mapcount(page);
- if (total_mapcount)
- *total_mapcount = mapcount;
- return mapcount;
+ return page_mapcount(page);
}
#endif
@@ -1244,6 +1199,26 @@ static inline void folio_put(struct folio *folio)
__put_page(&folio->page);
}
+/**
+ * folio_put_refs - Reduce the reference count on a folio.
+ * @folio: The folio.
+ * @refs: The amount to subtract from the folio's reference count.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately. Do not access the memory or the struct folio
+ * after calling folio_put_refs() unless you can be sure that these weren't
+ * the last references.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folio_put_refs(struct folio *folio, int refs)
+{
+ if (folio_ref_sub_and_test(folio, refs))
+ __put_page(&folio->page);
+}
+
static inline void put_page(struct page *page)
{
struct folio *folio = page_folio(page);
@@ -2644,7 +2619,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
extern struct vm_area_struct *vma_merge(struct mm_struct *,
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *, struct vm_userfaultfd_ctx);
+ struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below);
@@ -3153,7 +3128,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
#endif
void drop_slab(void);
-void drop_slab_node(int nid);
#ifndef CONFIG_MMU
#define randomize_va_space 0
@@ -3206,6 +3180,7 @@ enum mf_flags {
MF_ACTION_REQUIRED = 1 << 1,
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
+ MF_UNPOISON = 1 << 4,
};
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
@@ -3246,7 +3221,6 @@ enum mf_action_page_type {
MF_MSG_KERNEL_HIGH_ORDER,
MF_MSG_SLAB,
MF_MSG_DIFFERENT_COMPOUND,
- MF_MSG_POISONED_HUGE,
MF_MSG_HUGE,
MF_MSG_FREE_HUGE,
MF_MSG_NON_PMD_HUGE,
@@ -3261,7 +3235,6 @@ enum mf_action_page_type {
MF_MSG_CLEAN_LRU,
MF_MSG_TRUNCATED_LRU,
MF_MSG_BUDDY,
- MF_MSG_BUDDY_2ND,
MF_MSG_DAX,
MF_MSG_UNSPLIT_THP,
MF_MSG_UNKNOWN,
@@ -3390,5 +3363,16 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
return 0;
}
+#ifdef CONFIG_ANON_VMA_NAME
+int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+ unsigned long len_in, const char *name);
+#else
+static inline int
+madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+ unsigned long len_in, const char *name) {
+ return 0;
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e2ec68b0515c..b725839dfe71 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -2,8 +2,10 @@
#ifndef LINUX_MM_INLINE_H
#define LINUX_MM_INLINE_H
+#include <linux/atomic.h>
#include <linux/huge_mm.h>
#include <linux/swap.h>
+#include <linux/string.h>
/**
* folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
@@ -135,4 +137,138 @@ static __always_inline void del_page_from_lru_list(struct page *page,
{
lruvec_del_folio(lruvec, page_folio(page));
}
+
+#ifdef CONFIG_ANON_VMA_NAME
+/*
+ * mmap_lock should be read-locked when calling vma_anon_name() and while using
+ * the returned pointer.
+ */
+extern const char *vma_anon_name(struct vm_area_struct *vma);
+
+/*
+ * mmap_lock should be read-locked for orig_vma->vm_mm.
+ * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
+ * isolated.
+ */
+extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma);
+
+/*
+ * mmap_lock should be write-locked or vma should have been isolated under
+ * write-locked mmap_lock protection.
+ */
+extern void free_vma_anon_name(struct vm_area_struct *vma);
+
+/* mmap_lock should be read-locked */
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+ const char *name)
+{
+ const char *vma_name = vma_anon_name(vma);
+
+ /* either both NULL, or pointers to same string */
+ if (vma_name == name)
+ return true;
+
+ return name && vma_name && !strcmp(name, vma_name);
+}
+#else /* CONFIG_ANON_VMA_NAME */
+static inline const char *vma_anon_name(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma) {}
+static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+ const char *name)
+{
+ return true;
+}
+#endif /* CONFIG_ANON_VMA_NAME */
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
+{
+ atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+ atomic_inc(&mm->tlb_flush_pending);
+ /*
+ * The only time this value is relevant is when there are indeed pages
+ * to flush. And we'll only flush pages after changing them, which
+ * requires the PTL.
+ *
+ * So the ordering here is:
+ *
+ * atomic_inc(&mm->tlb_flush_pending);
+ * spin_lock(&ptl);
+ * ...
+ * set_pte_at();
+ * spin_unlock(&ptl);
+ *
+ * spin_lock(&ptl)
+ * mm_tlb_flush_pending();
+ * ....
+ * spin_unlock(&ptl);
+ *
+ * flush_tlb_range();
+ * atomic_dec(&mm->tlb_flush_pending);
+ *
+ * Where the increment if constrained by the PTL unlock, it thus
+ * ensures that the increment is visible if the PTE modification is
+ * visible. After all, if there is no PTE modification, nobody cares
+ * about TLB flushes either.
+ *
+ * This very much relies on users (mm_tlb_flush_pending() and
+ * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
+ * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
+ * locks (PPC) the unlock of one doesn't order against the lock of
+ * another PTL.
+ *
+ * The decrement is ordered by the flush_tlb_range(), such that
+ * mm_tlb_flush_pending() will not return false unless all flushes have
+ * completed.
+ */
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
+{
+ /*
+ * See inc_tlb_flush_pending().
+ *
+ * This cannot be smp_mb__before_atomic() because smp_mb() simply does
+ * not order against TLB invalidate completion, which is what we need.
+ *
+ * Therefore we must rely on tlb_flush_*() to guarantee order.
+ */
+ atomic_dec(&mm->tlb_flush_pending);
+}
+
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+{
+ /*
+ * Must be called after having acquired the PTL; orders against that
+ * PTLs release and therefore ensures that if we observe the modified
+ * PTE we must also observe the increment from inc_tlb_flush_pending().
+ *
+ * That is, it only guarantees to return true if there is a flush
+ * pending for _this_ PTL.
+ */
+ return atomic_read(&mm->tlb_flush_pending);
+}
+
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+ /*
+ * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
+ * for which there is a TLB flush pending in order to guarantee
+ * we've seen both that PTE modification and the increment.
+ *
+ * (no requirement on actually still holding the PTL, that is irrelevant)
+ */
+ return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
+
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1ae3537c7920..9db36dc5d4cf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -5,6 +5,7 @@
#include <linux/mm_types_task.h>
#include <linux/auxvec.h>
+#include <linux/kref.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
@@ -118,31 +119,6 @@ struct page {
atomic_long_t pp_frag_count;
};
};
- struct { /* slab, slob and slub */
- union {
- struct list_head slab_list;
- struct { /* Partial pages */
- struct page *next;
-#ifdef CONFIG_64BIT
- int pages; /* Nr of pages left */
-#else
- short int pages;
-#endif
- };
- };
- struct kmem_cache *slab_cache; /* not slob */
- /* Double-word boundary */
- void *freelist; /* first free object */
- union {
- void *s_mem; /* slab: first object */
- unsigned long counters; /* SLUB */
- struct { /* SLUB */
- unsigned inuse:16;
- unsigned objects:15;
- unsigned frozen:1;
- };
- };
- };
struct { /* Tail pages of compound page */
unsigned long compound_head; /* Bit zero is set */
@@ -206,9 +182,6 @@ struct page {
* which are currently stored here.
*/
unsigned int page_type;
-
- unsigned int active; /* SLAB */
- int units; /* SLOB */
};
/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
@@ -386,6 +359,12 @@ struct vm_userfaultfd_ctx {
struct vm_userfaultfd_ctx {};
#endif /* CONFIG_USERFAULTFD */
+struct anon_vma_name {
+ struct kref kref;
+ /* The name needs to be at the end because it is dynamically sized. */
+ char name[];
+};
+
/*
* This struct describes a virtual memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
@@ -426,11 +405,19 @@ struct vm_area_struct {
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
+ *
+ * For private anonymous mappings, a pointer to a null terminated string
+ * containing the name given to the vma, or NULL if unnamed.
*/
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } shared;
+
+ union {
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } shared;
+ /* Serialized by mmap_sem. */
+ struct anon_vma_name *anon_name;
+ };
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -632,7 +619,7 @@ struct mm_struct {
atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* See flush_tlb_batched_pending() */
- bool tlb_flush_batched;
+ atomic_t tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
#ifdef CONFIG_PREEMPT_RT
@@ -677,90 +664,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_finish_mmu(struct mmu_gather *tlb);
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
- atomic_set(&mm->tlb_flush_pending, 0);
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
- atomic_inc(&mm->tlb_flush_pending);
- /*
- * The only time this value is relevant is when there are indeed pages
- * to flush. And we'll only flush pages after changing them, which
- * requires the PTL.
- *
- * So the ordering here is:
- *
- * atomic_inc(&mm->tlb_flush_pending);
- * spin_lock(&ptl);
- * ...
- * set_pte_at();
- * spin_unlock(&ptl);
- *
- * spin_lock(&ptl)
- * mm_tlb_flush_pending();
- * ....
- * spin_unlock(&ptl);
- *
- * flush_tlb_range();
- * atomic_dec(&mm->tlb_flush_pending);
- *
- * Where the increment if constrained by the PTL unlock, it thus
- * ensures that the increment is visible if the PTE modification is
- * visible. After all, if there is no PTE modification, nobody cares
- * about TLB flushes either.
- *
- * This very much relies on users (mm_tlb_flush_pending() and
- * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
- * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
- * locks (PPC) the unlock of one doesn't order against the lock of
- * another PTL.
- *
- * The decrement is ordered by the flush_tlb_range(), such that
- * mm_tlb_flush_pending() will not return false unless all flushes have
- * completed.
- */
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
- /*
- * See inc_tlb_flush_pending().
- *
- * This cannot be smp_mb__before_atomic() because smp_mb() simply does
- * not order against TLB invalidate completion, which is what we need.
- *
- * Therefore we must rely on tlb_flush_*() to guarantee order.
- */
- atomic_dec(&mm->tlb_flush_pending);
-}
-
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
- /*
- * Must be called after having acquired the PTL; orders against that
- * PTLs release and therefore ensures that if we observe the modified
- * PTE we must also observe the increment from inc_tlb_flush_pending().
- *
- * That is, it only guarantees to return true if there is a flush
- * pending for _this_ PTL.
- */
- return atomic_read(&mm->tlb_flush_pending);
-}
-
-static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
-{
- /*
- * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
- * for which there is a TLB flush pending in order to guarantee
- * we've seen both that PTE modification and the increment.
- *
- * (no requirement on actually still holding the PTL, that is irrelevant)
- */
- return atomic_read(&mm->tlb_flush_pending) > 1;
-}
-
struct vm_fault;
/**
@@ -875,4 +778,49 @@ typedef struct {
unsigned long val;
} swp_entry_t;
+/**
+ * enum fault_flag - Fault flag definitions.
+ * @FAULT_FLAG_WRITE: Fault was a write fault.
+ * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
+ * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
+ * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
+ * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
+ * @FAULT_FLAG_TRIED: The fault has been tried once.
+ * @FAULT_FLAG_USER: The fault originated in userspace.
+ * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
+ * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
+ * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
+ *
+ * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+ * whether we would allow page faults to retry by specifying these two
+ * fault flags correctly. Currently there can be three legal combinations:
+ *
+ * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
+ * this is the first try
+ *
+ * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
+ * we've already tried at least once
+ *
+ * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
+ *
+ * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
+ * be used. Note that page faults can be allowed to retry for multiple times,
+ * in which case we'll have an initial fault with flags (a) then later on
+ * continuous faults with flags (b). We should always try to detect pending
+ * signals before a retry to make sure the continuous page faults can still be
+ * interrupted if necessary.
+ */
+enum fault_flag {
+ FAULT_FLAG_WRITE = 1 << 0,
+ FAULT_FLAG_MKWRITE = 1 << 1,
+ FAULT_FLAG_ALLOW_RETRY = 1 << 2,
+ FAULT_FLAG_RETRY_NOWAIT = 1 << 3,
+ FAULT_FLAG_KILLABLE = 1 << 4,
+ FAULT_FLAG_TRIED = 1 << 5,
+ FAULT_FLAG_USER = 1 << 6,
+ FAULT_FLAG_REMOTE = 1 << 7,
+ FAULT_FLAG_INSTRUCTION = 1 << 8,
+ FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
+};
+
#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 936dc0b6c226..aed44e9b5d89 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1047,6 +1047,15 @@ static inline int is_highmem_idx(enum zone_type idx)
#endif
}
+#ifdef CONFIG_ZONE_DMA
+bool has_managed_dma(void);
+#else
+static inline bool has_managed_dma(void)
+{
+ return false;
+}
+#endif
+
/**
* is_highmem - helper function to quickly check if a struct zone is a
* highmem zone or not. This is an attempt to keep references
diff --git a/include/linux/module.h b/include/linux/module.h
index c9f1200b2312..1e135fd5c076 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -290,7 +290,8 @@ extern typeof(name) __mod_##type##__##name##_device_table \
* files require multiple MODULE_FIRMWARE() specifiers */
#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
-#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+#define _MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+#define MODULE_IMPORT_NS(ns) _MODULE_IMPORT_NS(ns)
struct notifier_block;
@@ -595,9 +596,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
/* Look for this name: can be of form module:name. */
unsigned long module_kallsyms_lookup_name(const char *name);
-extern void __noreturn __module_put_and_exit(struct module *mod,
+extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
long code);
-#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
+#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
#ifdef CONFIG_MODULE_UNLOAD
int module_refcount(struct module *mod);
@@ -790,7 +791,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
return 0;
}
-#define module_put_and_exit(code) do_exit(code)
+#define module_put_and_kthread_exit(code) kthread_exit(code)
static inline void print_modules(void)
{
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d92a7e1a742..7f18a7555dff 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -113,9 +113,6 @@ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
extern dev_t name_to_dev_t(const char *name);
-
-extern unsigned int sysctl_mount_max;
-
extern bool path_is_mountpoint(const struct path *path);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
diff --git a/include/linux/of.h b/include/linux/of.h
index ff143a027abc..2dc77430a91a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -364,18 +364,12 @@ extern const struct of_device_id *of_match_node(
const struct of_device_id *matches, const struct device_node *node);
extern int of_modalias_node(struct device_node *node, char *modalias, int len);
extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
-extern struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name,
- int index);
-extern int of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name, const char *cells_name, int index,
- struct of_phandle_args *out_args);
+extern int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name, const char *cells_name, int cell_count,
+ int index, struct of_phandle_args *out_args);
extern int of_parse_phandle_with_args_map(const struct device_node *np,
const char *list_name, const char *stem_name, int index,
struct of_phandle_args *out_args);
-extern int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cells_count, int index,
- struct of_phandle_args *out_args);
extern int of_count_phandle_with_args(const struct device_node *np,
const char *list_name, const char *cells_name);
@@ -416,130 +410,6 @@ extern int of_detach_node(struct device_node *);
#define of_match_ptr(_ptr) (_ptr)
-/**
- * of_property_read_u8_array - Find and read an array of u8 from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 8-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- * ``property = /bits/ 8 <0x50 0x60 0x70>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u8 value can be decoded.
- */
-static inline int of_property_read_u8_array(const struct device_node *np,
- const char *propname,
- u8 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u8_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u16_array - Find and read an array of u16 from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 16-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u16 value can be decoded.
- */
-static inline int of_property_read_u16_array(const struct device_node *np,
- const char *propname,
- u16 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u16_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u32_array - Find and read an array of 32 bit integers
- * from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 32-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u32 value can be decoded.
- */
-static inline int of_property_read_u32_array(const struct device_node *np,
- const char *propname,
- u32 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u32_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
-/**
- * of_property_read_u64_array - Find and read an array of 64 bit integers
- * from a property.
- *
- * @np: device node from which the property value is to be read.
- * @propname: name of the property to be searched.
- * @out_values: pointer to return value, modified only if return value is 0.
- * @sz: number of array elements to read
- *
- * Search for a property in a device node and read 64-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u64 value can be decoded.
- */
-static inline int of_property_read_u64_array(const struct device_node *np,
- const char *propname,
- u64 *out_values, size_t sz)
-{
- int ret = of_property_read_variable_u64_array(np, propname, out_values,
- sz, 0);
- if (ret >= 0)
- return 0;
- else
- return ret;
-}
-
/*
* struct property *prop;
* const __be32 *p;
@@ -734,32 +604,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np,
return -ENOSYS;
}
-static inline int of_property_read_u8_array(const struct device_node *np,
- const char *propname, u8 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u16_array(const struct device_node *np,
- const char *propname, u16 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u32_array(const struct device_node *np,
- const char *propname,
- u32 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
-static inline int of_property_read_u64_array(const struct device_node *np,
- const char *propname,
- u64 *out_values, size_t sz)
-{
- return -ENOSYS;
-}
-
static inline int of_property_read_u32_index(const struct device_node *np,
const char *propname, u32 index, u32 *out_value)
{
@@ -865,18 +709,12 @@ static inline int of_property_read_string_helper(const struct device_node *np,
return -ENOSYS;
}
-static inline struct device_node *of_parse_phandle(const struct device_node *np,
- const char *phandle_name,
- int index)
-{
- return NULL;
-}
-
-static inline int of_parse_phandle_with_args(const struct device_node *np,
- const char *list_name,
- const char *cells_name,
- int index,
- struct of_phandle_args *out_args)
+static inline int __of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int cell_count,
+ int index,
+ struct of_phandle_args *out_args)
{
return -ENOSYS;
}
@@ -890,13 +728,6 @@ static inline int of_parse_phandle_with_args_map(const struct device_node *np,
return -ENOSYS;
}
-static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
- const char *list_name, int cells_count, int index,
- struct of_phandle_args *out_args)
-{
- return -ENOSYS;
-}
-
static inline int of_count_phandle_with_args(const struct device_node *np,
const char *list_name,
const char *cells_name)
@@ -1078,6 +909,117 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ
}
/**
+ * of_parse_phandle - Resolve a phandle property to a device_node pointer
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a phandle value
+ * @index: For properties holding a table of phandles, this is the index into
+ * the table
+ *
+ * Return: The device_node pointer with refcount incremented. Use
+ * of_node_put() on it when done.
+ */
+static inline struct device_node *of_parse_phandle(const struct device_node *np,
+ const char *phandle_name,
+ int index)
+{
+ struct of_phandle_args args;
+
+ if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
+ index, &args))
+ return NULL;
+
+ return args.np;
+}
+
+/**
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
+ * @np: pointer to a device tree node containing a list
+ * @list_name: property name that contains a list
+ * @cells_name: property name that specifies phandles' arguments count
+ * @index: index of a phandle to parse out
+ * @out_args: optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ * phandle1: node1 {
+ * #list-cells = <2>;
+ * };
+ *
+ * phandle2: node2 {
+ * #list-cells = <1>;
+ * };
+ *
+ * node3 {
+ * list = <&phandle1 1 2 &phandle2 3>;
+ * };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
+ */
+static inline int of_parse_phandle_with_args(const struct device_node *np,
+ const char *list_name,
+ const char *cells_name,
+ int index,
+ struct of_phandle_args *out_args)
+{
+ int cell_count = -1;
+
+ /* If cells_name is NULL we assume a cell count of 0 */
+ if (!cells_name)
+ cell_count = 0;
+
+ return __of_parse_phandle_with_args(np, list_name, cells_name,
+ cell_count, index, out_args);
+}
+
+/**
+ * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
+ * @np: pointer to a device tree node containing a list
+ * @list_name: property name that contains a list
+ * @cell_count: number of argument cells following the phandle
+ * @index: index of a phandle to parse out
+ * @out_args: optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ * phandle1: node1 {
+ * };
+ *
+ * phandle2: node2 {
+ * };
+ *
+ * node3 {
+ * list = <&phandle1 0 2 &phandle2 2 3>;
+ * };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
+ */
+static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
+ const char *list_name,
+ int cell_count,
+ int index,
+ struct of_phandle_args *out_args)
+{
+ return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
+ index, out_args);
+}
+
+/**
* of_property_count_u8_elems - Count the number of u8 elements in a property
*
* @np: device node from which the property value is to be read.
@@ -1236,6 +1178,130 @@ static inline bool of_property_read_bool(const struct device_node *np,
return prop ? true : false;
}
+/**
+ * of_property_read_u8_array - Find and read an array of u8 from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 8-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ * ``property = /bits/ 8 <0x50 0x60 0x70>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u8 value can be decoded.
+ */
+static inline int of_property_read_u8_array(const struct device_node *np,
+ const char *propname,
+ u8 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u8_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u16_array - Find and read an array of u16 from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 16-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u16 value can be decoded.
+ */
+static inline int of_property_read_u16_array(const struct device_node *np,
+ const char *propname,
+ u16 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u16_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u32_array - Find and read an array of 32 bit integers
+ * from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 32-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u32 value can be decoded.
+ */
+static inline int of_property_read_u32_array(const struct device_node *np,
+ const char *propname,
+ u32 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u32_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
+/**
+ * of_property_read_u64_array - Find and read an array of 64 bit integers
+ * from a property.
+ *
+ * @np: device node from which the property value is to be read.
+ * @propname: name of the property to be searched.
+ * @out_values: pointer to return value, modified only if return value is 0.
+ * @sz: number of array elements to read
+ *
+ * Search for a property in a device node and read 64-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u64 value can be decoded.
+ */
+static inline int of_property_read_u64_array(const struct device_node *np,
+ const char *propname,
+ u64 *out_values, size_t sz)
+{
+ int ret = of_property_read_variable_u64_array(np, propname, out_values,
+ sz, 0);
+ if (ret >= 0)
+ return 0;
+ else
+ return ret;
+}
+
static inline int of_property_read_u8(const struct device_node *np,
const char *propname,
u8 *out_value)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b3d353d537e2..1c3b6e5c8bfd 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -380,7 +380,7 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTCLEARFLAG(uname, lname, policy)
#define TESTPAGEFLAG_FALSE(uname, lname) \
-static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
+static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname, lname) \
@@ -519,7 +519,11 @@ PAGEFLAG_FALSE(Uncached, uncached)
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
+#define MAGIC_HWPOISON 0x48575053U /* HWPS */
+extern void SetPageHWPoisonTakenOff(struct page *page);
+extern void ClearPageHWPoisonTakenOff(struct page *page);
extern bool take_page_off_buddy(struct page *page);
+extern bool put_page_back_buddy(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
@@ -916,43 +920,6 @@ extern bool is_free_buddy_page(struct page *page);
__PAGEFLAG(Isolated, isolated, PF_ANY);
-/*
- * If network-based swap is enabled, sl*b must keep track of whether pages
- * were allocated from pfmemalloc reserves.
- */
-static inline int PageSlabPfmemalloc(struct page *page)
-{
- VM_BUG_ON_PAGE(!PageSlab(page), page);
- return PageActive(page);
-}
-
-/*
- * A version of PageSlabPfmemalloc() for opportunistic checks where the page
- * might have been freed under us and not be a PageSlab anymore.
- */
-static inline int __PageSlabPfmemalloc(struct page *page)
-{
- return PageActive(page);
-}
-
-static inline void SetPageSlabPfmemalloc(struct page *page)
-{
- VM_BUG_ON_PAGE(!PageSlab(page), page);
- SetPageActive(page);
-}
-
-static inline void __ClearPageSlabPfmemalloc(struct page *page)
-{
- VM_BUG_ON_PAGE(!PageSlab(page), page);
- __ClearPageActive(page);
-}
-
-static inline void ClearPageSlabPfmemalloc(struct page *page)
-{
- VM_BUG_ON_PAGE(!PageSlab(page), page);
- ClearPageActive(page);
-}
-
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1UL << PG_mlocked)
#else
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 83abf95e9fa7..4663dfed1293 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -13,7 +13,6 @@
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
-extern struct page_ext_operations page_idle_ops;
static inline bool folio_test_young(struct folio *folio)
{
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
new file mode 100644
index 000000000000..38cace1da7b6
--- /dev/null
+++ b/include/linux/page_table_check.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2021, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#ifndef __LINUX_PAGE_TABLE_CHECK_H
+#define __LINUX_PAGE_TABLE_CHECK_H
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+#include <linux/jump_label.h>
+
+extern struct static_key_true page_table_check_disabled;
+extern struct page_ext_operations page_table_check_ops;
+
+void __page_table_check_zero(struct page *page, unsigned int order);
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte);
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd);
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud);
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_clear(mm, addr, pte);
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_clear(mm, addr, pmd);
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_clear(mm, addr, pud);
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_set(mm, addr, ptep, pte);
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_set(mm, addr, pmdp, pmd);
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_set(mm, addr, pudp, pud);
+}
+
+#else
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+}
+
+#endif /* CONFIG_PAGE_TABLE_CHECK */
+#endif /* __LINUX_PAGE_TABLE_CHECK_H */
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index dda8d5868c81..67b1246f136b 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -111,6 +111,7 @@ static_assert(offsetof(struct pagevec, pages) ==
static inline void folio_batch_init(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->percpu_pvec_drained = false;
}
static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0a7b6b2f163b..8253a5413d7c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -155,6 +155,15 @@ enum pci_interrupt_pin {
#define PCI_NUM_INTX 4
/*
+ * Reading from a device that doesn't respond typically returns ~0. A
+ * successful read from a device may also return ~0, so you need additional
+ * information to reliably identify errors.
+ */
+#define PCI_ERROR_RESPONSE (~0ULL)
+#define PCI_SET_ERROR_RESPONSE(val) (*(val) = ((typeof(*(val))) PCI_ERROR_RESPONSE))
+#define PCI_POSSIBLE_ERROR(val) ((val) == ((typeof(val)) PCI_ERROR_RESPONSE))
+
+/*
* pci_power_t values must match the bits in the Capabilities PME_Support
* and Control/Status PowerState fields in the Power Management capability.
*/
@@ -456,6 +465,7 @@ struct pci_dev {
unsigned int link_active_reporting:1;/* Device capable of reporting link active */
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
+ unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
@@ -1777,7 +1787,10 @@ static inline struct pci_dev *pci_get_class(unsigned int class,
struct pci_dev *from)
{ return NULL; }
-#define pci_dev_present(ids) (0)
+
+static inline int pci_dev_present(const struct pci_device_id *ids)
+{ return 0; }
+
#define no_pci_devices() (1)
#define pci_dev_put(dev) do { } while (0)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 86678588d191..aad54c666407 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2618,8 +2618,8 @@
#define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320
#define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321
#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329
-#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A
-#define PCI_DEVICE_ID_INTEL_PXHV 0x032C
+#define PCI_DEVICE_ID_INTEL_PXH_1 0x032a
+#define PCI_DEVICE_ID_INTEL_PXHV 0x032c
#define PCI_DEVICE_ID_INTEL_80332_0 0x0330
#define PCI_DEVICE_ID_INTEL_80332_1 0x0332
#define PCI_DEVICE_ID_INTEL_80333_0 0x0370
@@ -2637,14 +2637,14 @@
#define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822
#define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823
#define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824
-#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F
-#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095E
+#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084f
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e
#define PCI_DEVICE_ID_INTEL_I960 0x0960
#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
#define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60
#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062
#define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085
-#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F
+#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f
#define PCI_DEVICE_ID_INTEL_82815_MC 0x1130
#define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132
#define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221
@@ -2738,12 +2738,6 @@
#define PCI_DEVICE_ID_INTEL_82801EB_11 0x24db
#define PCI_DEVICE_ID_INTEL_82801EB_12 0x24dc
#define PCI_DEVICE_ID_INTEL_82801EB_13 0x24dd
-#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1
-#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2
-#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4
-#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6
-#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
-#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac
#define PCI_DEVICE_ID_INTEL_82820_HB 0x2500
#define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501
#define PCI_DEVICE_ID_INTEL_82850_HB 0x2530
@@ -2758,14 +2752,15 @@
#define PCI_DEVICE_ID_INTEL_82915G_IG 0x2582
#define PCI_DEVICE_ID_INTEL_82915GM_HB 0x2590
#define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592
-#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25F0
-#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25F5
-#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25F6
-#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770
-#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772
-#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778
-#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27A0
-#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27A2
+#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1
+#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2
+#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4
+#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6
+#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
+#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac
+#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25f0
+#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25f5
+#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25f6
#define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640
#define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641
#define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642
@@ -2777,6 +2772,11 @@
#define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698
#define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b
#define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e
+#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770
+#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772
+#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778
+#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0
+#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2
#define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8
#define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9
#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0
@@ -2829,7 +2829,7 @@
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_PHY0 0x2c91
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR 0x2c98
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD 0x2c99
-#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9C
+#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9c
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL 0x2ca0
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR 0x2ca1
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK 0x2ca2
@@ -2941,16 +2941,16 @@
#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
-#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
-#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3
-#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
-#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030
#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035
#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036
-#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff
#define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031
#define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032
+#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3
+#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
+#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff
#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ae4004e7957e..f1ec5ad1351c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -94,10 +94,7 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
extern enum pcpu_fc pcpu_chosen_fc;
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
- size_t align);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -111,15 +108,13 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
extern int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn);
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 117f230bcdfd..733649184b27 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -693,18 +693,6 @@ struct perf_event {
u64 total_time_running;
u64 tstamp;
- /*
- * timestamp shadows the actual context timing but it can
- * be safely used in NMI interrupt context. It reflects the
- * context time as it was when the event was last scheduled in,
- * or when ctx_sched_in failed to schedule the event because we
- * run out of PMC.
- *
- * ctx_time already accounts for ctx->timestamp. Therefore to
- * compute ctx_time for a sample, simply add perf_clock().
- */
- u64 shadow_ctx_time;
-
struct perf_event_attr attr;
u16 header_size;
u16 id_header_size;
@@ -852,6 +840,7 @@ struct perf_event_context {
*/
u64 time;
u64 timestamp;
+ u64 timeoffset;
/*
* These fields let us detect when two contexts have both
@@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern {
struct perf_cgroup_info {
u64 time;
u64 timestamp;
+ u64 timeoffset;
+ int active;
};
struct perf_cgroup {
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e24d2c992b11..bc8713a76e03 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,14 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PTEP_CLEAR
+static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_clear(mm, addr, ptep);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index fc5642431b92..c00c618ef290 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -238,10 +238,6 @@ void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
-extern unsigned int pipe_max_size;
-extern unsigned long pipe_user_pages_hard;
-extern unsigned long pipe_user_pages_soft;
-
/* Wait for a pipe to be readable/writable while dropping the pipe lock */
void pipe_wait_readable(struct pipe_inode_info *);
void pipe_wait_writable(struct pipe_inode_info *);
diff --git a/include/linux/platform_data/ad5755.h b/include/linux/platform_data/ad5755.h
deleted file mode 100644
index e371e08f04bc..000000000000
--- a/include/linux/platform_data/ad5755.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Analog Devices Inc.
- */
-#ifndef __LINUX_PLATFORM_DATA_AD5755_H__
-#define __LINUX_PLATFORM_DATA_AD5755_H__
-
-enum ad5755_mode {
- AD5755_MODE_VOLTAGE_0V_5V = 0,
- AD5755_MODE_VOLTAGE_0V_10V = 1,
- AD5755_MODE_VOLTAGE_PLUSMINUS_5V = 2,
- AD5755_MODE_VOLTAGE_PLUSMINUS_10V = 3,
- AD5755_MODE_CURRENT_4mA_20mA = 4,
- AD5755_MODE_CURRENT_0mA_20mA = 5,
- AD5755_MODE_CURRENT_0mA_24mA = 6,
-};
-
-enum ad5755_dc_dc_phase {
- AD5755_DC_DC_PHASE_ALL_SAME_EDGE = 0,
- AD5755_DC_DC_PHASE_A_B_SAME_EDGE_C_D_OPP_EDGE = 1,
- AD5755_DC_DC_PHASE_A_C_SAME_EDGE_B_D_OPP_EDGE = 2,
- AD5755_DC_DC_PHASE_90_DEGREE = 3,
-};
-
-enum ad5755_dc_dc_freq {
- AD5755_DC_DC_FREQ_250kHZ = 0,
- AD5755_DC_DC_FREQ_410kHZ = 1,
- AD5755_DC_DC_FREQ_650kHZ = 2,
-};
-
-enum ad5755_dc_dc_maxv {
- AD5755_DC_DC_MAXV_23V = 0,
- AD5755_DC_DC_MAXV_24V5 = 1,
- AD5755_DC_DC_MAXV_27V = 2,
- AD5755_DC_DC_MAXV_29V5 = 3,
-};
-
-enum ad5755_slew_rate {
- AD5755_SLEW_RATE_64k = 0,
- AD5755_SLEW_RATE_32k = 1,
- AD5755_SLEW_RATE_16k = 2,
- AD5755_SLEW_RATE_8k = 3,
- AD5755_SLEW_RATE_4k = 4,
- AD5755_SLEW_RATE_2k = 5,
- AD5755_SLEW_RATE_1k = 6,
- AD5755_SLEW_RATE_500 = 7,
- AD5755_SLEW_RATE_250 = 8,
- AD5755_SLEW_RATE_125 = 9,
- AD5755_SLEW_RATE_64 = 10,
- AD5755_SLEW_RATE_32 = 11,
- AD5755_SLEW_RATE_16 = 12,
- AD5755_SLEW_RATE_8 = 13,
- AD5755_SLEW_RATE_4 = 14,
- AD5755_SLEW_RATE_0_5 = 15,
-};
-
-enum ad5755_slew_step_size {
- AD5755_SLEW_STEP_SIZE_1 = 0,
- AD5755_SLEW_STEP_SIZE_2 = 1,
- AD5755_SLEW_STEP_SIZE_4 = 2,
- AD5755_SLEW_STEP_SIZE_8 = 3,
- AD5755_SLEW_STEP_SIZE_16 = 4,
- AD5755_SLEW_STEP_SIZE_32 = 5,
- AD5755_SLEW_STEP_SIZE_64 = 6,
- AD5755_SLEW_STEP_SIZE_128 = 7,
- AD5755_SLEW_STEP_SIZE_256 = 8,
-};
-
-/**
- * struct ad5755_platform_data - AD5755 DAC driver platform data
- * @ext_dc_dc_compenstation_resistor: Whether an external DC-DC converter
- * compensation register is used.
- * @dc_dc_phase: DC-DC converter phase.
- * @dc_dc_freq: DC-DC converter frequency.
- * @dc_dc_maxv: DC-DC maximum allowed boost voltage.
- * @dac.mode: The mode to be used for the DAC output.
- * @dac.ext_current_sense_resistor: Whether an external current sense resistor
- * is used.
- * @dac.enable_voltage_overrange: Whether to enable 20% voltage output overrange.
- * @dac.slew.enable: Whether to enable digital slew.
- * @dac.slew.rate: Slew rate of the digital slew.
- * @dac.slew.step_size: Slew step size of the digital slew.
- **/
-struct ad5755_platform_data {
- bool ext_dc_dc_compenstation_resistor;
- enum ad5755_dc_dc_phase dc_dc_phase;
- enum ad5755_dc_dc_freq dc_dc_freq;
- enum ad5755_dc_dc_maxv dc_dc_maxv;
-
- struct {
- enum ad5755_mode mode;
- bool ext_current_sense_resistor;
- bool enable_voltage_overrange;
- struct {
- bool enable;
- enum ad5755_slew_rate rate;
- enum ad5755_slew_step_size step_size;
- } slew;
- } dac[4];
-};
-
-#endif
diff --git a/include/linux/platform_data/bcm7038_wdt.h b/include/linux/platform_data/bcm7038_wdt.h
new file mode 100644
index 000000000000..e18cfd9ec8f9
--- /dev/null
+++ b/include/linux/platform_data/bcm7038_wdt.h
@@ -0,0 +1,8 @@
+#ifndef __BCM7038_WDT_PDATA_H
+#define __BCM7038_WDT_PDATA_H
+
+struct bcm7038_wdt_platform_data {
+ const char *clk_name;
+};
+
+#endif /* __BCM7038_WDT_PDATA_H */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e1e9402180b9..f7d2be686359 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -8,6 +8,7 @@
#ifndef _LINUX_PM_H
#define _LINUX_PM_H
+#include <linux/export.h>
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
@@ -357,13 +358,47 @@ struct dev_pm_ops {
#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
#endif
+#define _DEFINE_DEV_PM_OPS(name, \
+ suspend_fn, resume_fn, \
+ runtime_suspend_fn, runtime_resume_fn, idle_fn) \
+const struct dev_pm_ops name = { \
+ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \
+}
+
+#ifdef CONFIG_PM
+#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+ runtime_resume_fn, idle_fn, sec) \
+ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+ runtime_resume_fn, idle_fn); \
+ _EXPORT_SYMBOL(name, sec)
+#else
+#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+ runtime_resume_fn, idle_fn, sec) \
+static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
+ resume_fn, runtime_suspend_fn, \
+ runtime_resume_fn, idle_fn)
+#endif
+
/*
* Use this if you want to use the same suspend and resume callbacks for suspend
* to RAM and hibernation.
+ *
+ * If the underlying dev_pm_ops struct symbol has to be exported, use
+ * EXPORT_SIMPLE_DEV_PM_OPS() or EXPORT_GPL_SIMPLE_DEV_PM_OPS() instead.
*/
#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-static const struct dev_pm_ops name = { \
- SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
+
+#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "")
+#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl")
+
+/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
+#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+const struct dev_pm_ops __maybe_unused name = { \
+ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
}
/*
@@ -378,20 +413,10 @@ static const struct dev_pm_ops name = { \
* suspend and "early" resume callback pointers, .suspend_late() and
* .resume_early(), to the same routines as .runtime_suspend() and
* .runtime_resume(), respectively (and analogously for hibernation).
+ *
+ * Deprecated. You most likely don't want this macro. Use
+ * DEFINE_RUNTIME_DEV_PM_OPS() instead.
*/
-#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
-static const struct dev_pm_ops name = { \
- SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
- RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
-}
-
-/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
-#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-const struct dev_pm_ops __maybe_unused name = { \
- SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-}
-
-/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */
#define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
const struct dev_pm_ops __maybe_unused name = { \
SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 016de5776b6d..9f09601c465a 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -22,6 +22,30 @@
usage_count */
#define RPM_AUTO 0x08 /* Use autosuspend_delay */
+/*
+ * Use this for defining a set of PM operations to be used in all situations
+ * (system suspend, hibernation or runtime PM).
+ *
+ * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS()
+ * macro, which uses the provided callbacks for both runtime PM and system
+ * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend()
+ * and pm_runtime_force_resume() for its system sleep callbacks.
+ *
+ * If the underlying dev_pm_ops struct symbol has to be exported, use
+ * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead.
+ */
+#define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+ _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \
+ pm_runtime_force_resume, suspend_fn, \
+ resume_fn, idle_fn)
+
+#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+ suspend_fn, resume_fn, idle_fn, "")
+#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+ suspend_fn, resume_fn, idle_fn, "_gpl")
+
#ifdef CONFIG_PM
extern struct workqueue_struct *pm_wq;
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 1cdc32b1f1b0..a9e0e1c2d1f2 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -8,12 +8,10 @@
#include <linux/wait.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <uapi/linux/poll.h>
#include <uapi/linux/eventpoll.h>
-extern struct ctl_table epoll_table[]; /* for sysctl */
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
additional memory. */
#ifdef __clang__
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9497f6b98339..1522df223c0f 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -183,10 +183,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
extern int printk_delay_msec;
extern int dmesg_restrict;
-extern int
-devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf,
- size_t *lenp, loff_t *ppos);
-
extern void wake_up_klogd(void);
char *log_buf_addr_get(void);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 069c7fd95396..81d6e4ec2294 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -110,7 +110,16 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops);
extern void proc_set_size(struct proc_dir_entry *, loff_t);
extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t);
-extern void *PDE_DATA(const struct inode *);
+
+/*
+ * Obtain the private data passed by user through proc_create_data() or
+ * related.
+ */
+static inline void *pde_data(const struct inode *inode)
+{
+ return inode->i_private;
+}
+
extern void *proc_get_parent_data(const struct inode *);
extern void proc_remove(struct proc_dir_entry *);
extern void remove_proc_entry(const char *, struct proc_dir_entry *);
@@ -178,12 +187,20 @@ static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
#define proc_create_seq(name, mode, parent, ops) ({NULL;})
#define proc_create_single(name, mode, parent, show) ({NULL;})
#define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
-#define proc_create(name, mode, parent, proc_ops) ({NULL;})
-#define proc_create_data(name, mode, parent, proc_ops, data) ({NULL;})
+
+static inline struct proc_dir_entry *
+proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent,
+ const struct proc_ops *proc_ops)
+{ return NULL; }
+
+static inline struct proc_dir_entry *
+proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent,
+ const struct proc_ops *proc_ops, void *data)
+{ return NULL; }
static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
-static inline void *PDE_DATA(const struct inode *inode) {BUG(); return NULL;}
+static inline void *pde_data(const struct inode *inode) {BUG(); return NULL;}
static inline void *proc_get_parent_data(const struct inode *inode) { BUG(); return NULL; }
static inline void proc_remove(struct proc_dir_entry *de) {}
diff --git a/include/linux/profile.h b/include/linux/profile.h
index fd18ca96f557..11db1ec516e2 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -31,11 +31,6 @@ static inline int create_proc_profile(void)
}
#endif
-enum profile_type {
- PROFILE_TASK_EXIT,
- PROFILE_MUNMAP
-};
-
#ifdef CONFIG_PROFILING
extern int prof_on __read_mostly;
@@ -66,23 +61,6 @@ static inline void profile_hit(int type, void *ip)
struct task_struct;
struct mm_struct;
-/* task is in do_exit() */
-void profile_task_exit(struct task_struct * task);
-
-/* task is dead, free task struct ? Returns 1 if
- * the task was taken, 0 if the task should be freed.
- */
-int profile_handoff_task(struct task_struct * task);
-
-/* sys_munmap */
-void profile_munmap(unsigned long addr);
-
-int task_handoff_register(struct notifier_block * n);
-int task_handoff_unregister(struct notifier_block * n);
-
-int profile_event_register(enum profile_type, struct notifier_block * n);
-int profile_event_unregister(enum profile_type, struct notifier_block * n);
-
#else
#define prof_on 0
@@ -107,29 +85,6 @@ static inline void profile_hit(int type, void *ip)
return;
}
-static inline int task_handoff_register(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int task_handoff_unregister(struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
-{
- return -ENOSYS;
-}
-
-#define profile_task_exit(a) do { } while (0)
-#define profile_handoff_task(a) (0)
-#define profile_munmap(a) do { } while (0)
#endif /* CONFIG_PROFILING */
diff --git a/include/linux/property.h b/include/linux/property.h
index 8355f99ebd47..7399a0b45f98 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -122,6 +122,8 @@ void fwnode_handle_put(struct fwnode_handle *fwnode);
int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index);
+void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index);
+
unsigned int device_get_child_node_count(struct device *dev);
static inline bool device_property_read_bool(struct device *dev,
diff --git a/include/linux/psi.h b/include/linux/psi.h
index a70ca833c6d7..f8ce53bfdb2a 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
+void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 516c0fe836fd..1a3cef26d129 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -141,9 +141,6 @@ struct psi_trigger {
* events to one per window
*/
u64 last_event_time;
-
- /* Refcounting to prevent premature destruction */
- struct kref refcount;
};
struct psi_group {
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index c11c9db5825c..60f3453be23e 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -4,6 +4,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
struct ref_tracker;
@@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
spin_lock_init(&dir->lock);
dir->quarantine_avail = quarantine_count;
refcount_set(&dir->untracked, 1);
+ stack_depot_init();
}
void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index 4846f72759b2..c7e2f21dd5c1 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -9,18 +9,6 @@
#ifndef LINUX_RIO_IDS_H
#define LINUX_RIO_IDS_H
-#define RIO_VID_FREESCALE 0x0002
-#define RIO_DID_MPC8560 0x0003
-
-#define RIO_VID_TUNDRA 0x000d
-#define RIO_DID_TSI500 0x0500
-#define RIO_DID_TSI568 0x0568
-#define RIO_DID_TSI572 0x0572
-#define RIO_DID_TSI574 0x0574
-#define RIO_DID_TSI576 0x0578 /* Same ID as Tsi578 */
-#define RIO_DID_TSI577 0x0577
-#define RIO_DID_TSI578 0x0578
-
#define RIO_VID_IDT 0x0038
#define RIO_DID_IDT70K200 0x0310
#define RIO_DID_IDTCPS8 0x035c
@@ -33,7 +21,6 @@
#define RIO_DID_IDTCPS1616 0x0379
#define RIO_DID_IDTVPS1616 0x0377
#define RIO_DID_IDTSPS1616 0x0378
-#define RIO_DID_TSI721 0x80ab
#define RIO_DID_IDTRXS1632 0x80e5
#define RIO_DID_IDTRXS2448 0x80e6
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 2c0ad417ce3c..8f416c5e929e 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -55,6 +55,12 @@ do { \
#define write_lock(lock) _raw_write_lock(lock)
#define read_lock(lock) _raw_read_lock(lock)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define write_lock_nested(lock, subclass) _raw_write_lock_nested(lock, subclass)
+#else
+#define write_lock_nested(lock, subclass) _raw_write_lock(lock)
+#endif
+
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
#define read_lock_irqsave(lock, flags) \
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index f1db6f17c4fb..dceb0a59b692 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -17,6 +17,7 @@
void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_write_lock(rwlock_t *lock) __acquires(lock);
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) __acquires(lock);
void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock);
void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires(lock);
@@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock)
LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
}
+static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+ preempt_disable();
+ rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
+}
+
#endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */
static inline void __raw_write_unlock(rwlock_t *lock)
diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
index 49c1f3842ed5..8544ff05e594 100644
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock);
extern int rt_read_trylock(rwlock_t *rwlock);
extern void rt_read_unlock(rwlock_t *rwlock);
extern void rt_write_lock(rwlock_t *rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
extern int rt_write_trylock(rwlock_t *rwlock);
extern void rt_write_unlock(rwlock_t *rwlock);
@@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock)
rt_write_lock(rwlock);
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass)
+{
+ rt_write_lock_nested(rwlock, subclass);
+}
+#else
+#define write_lock_nested(lock, subclass) rt_write_lock(((void)(subclass), (lock)))
+#endif
+
static __always_inline void write_lock_bh(rwlock_t *rwlock)
{
local_bh_disable();
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -416,6 +416,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
}
/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
+/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
* @depth: New number of bits to resize to.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7a1f16df66e3..f5b2be39a78c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -274,8 +274,13 @@ struct task_group;
#define get_current_state() READ_ONCE(current->__state)
-/* Task command name length: */
-#define TASK_COMM_LEN 16
+/*
+ * Define the task command name length as enum, then it can be visible to
+ * BPF programs.
+ */
+enum {
+ TASK_COMM_LEN = 16,
+};
extern void scheduler_tick(void);
@@ -614,10 +619,6 @@ struct sched_dl_entity {
* task has to wait for a replenishment to be performed at the
* next firing of dl_timer.
*
- * @dl_boosted tells if we are boosted due to DI. If so we are
- * outside bandwidth enforcement mechanism (but only until we
- * exit the critical section);
- *
* @dl_yielded tells if task gave up the CPU before consuming
* all its available runtime during the last job.
*
@@ -991,8 +992,8 @@ struct task_struct {
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;
- /* PF_IO_WORKER */
- void *pf_io_worker;
+ /* PF_KTHREAD | PF_IO_WORKER */
+ void *worker_private;
u64 utime;
u64 stime;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aca874d33fe6..aa5f09ca5bcf 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -214,6 +214,32 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
+/* Any memory-allocation retry loop should use
+ * memalloc_retry_wait(), and pass the flags for the most
+ * constrained allocation attempt that might have failed.
+ * This provides useful documentation of where loops are,
+ * and a central place to fine tune the waiting as the MM
+ * implementation changes.
+ */
+static inline void memalloc_retry_wait(gfp_t gfp_flags)
+{
+ /* We use io_schedule_timeout because waiting for memory
+ * typically included waiting for dirty pages to be
+ * written out, which requires IO.
+ */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ gfp_flags = current_gfp_context(gfp_flags);
+ if (gfpflags_allow_blocking(gfp_flags) &&
+ !(gfp_flags & __GFP_NORETRY))
+ /* Probably waited already, no need for much more */
+ io_schedule_timeout(1);
+ else
+ /* Probably didn't wait, and has now released a lock,
+ * so now is a good time to wait
+ */
+ io_schedule_timeout(HZ/50);
+}
+
/**
* might_alloc - Mark possible allocation sites
* @gfp_mask: gfp_t flags that would be used to allocate
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 33a50642cf41..b6ecb9fc4cd2 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -109,13 +109,9 @@ struct signal_struct {
/* thread group exit support */
int group_exit_code;
- /* overloaded:
- * - notify group_exit_task when ->count is equal to notify_count
- * - everyone except group_exit_task is stopped during signal delivery
- * of fatal signals, group_exit_task processes the signal.
- */
+ /* notify group_exec_task when notify_count is less or equal to 0 */
int notify_count;
- struct task_struct *group_exit_task;
+ struct task_struct *group_exec_task;
/* thread group stop support, overloads group_exit_code too */
int group_stop_count;
@@ -256,7 +252,6 @@ struct signal_struct {
#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */
#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */
-#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */
/*
* Pending notifications to parent.
*/
@@ -272,31 +267,25 @@ struct signal_struct {
static inline void signal_set_stop_flags(struct signal_struct *sig,
unsigned int flags)
{
- WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+ WARN_ON(sig->flags & SIGNAL_GROUP_EXIT);
sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
}
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
- return (sig->flags & SIGNAL_GROUP_EXIT) ||
- (sig->group_exit_task != NULL);
-}
-
extern void flush_signals(struct task_struct *);
extern void ignore_signals(struct task_struct *);
extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *task,
- sigset_t *mask, kernel_siginfo_t *info);
+extern int dequeue_signal(struct task_struct *task, sigset_t *mask,
+ kernel_siginfo_t *info, enum pid_type *type);
static inline int kernel_dequeue_signal(void)
{
struct task_struct *task = current;
kernel_siginfo_t __info;
+ enum pid_type __type;
int ret;
spin_lock_irq(&task->sighand->siglock);
- ret = dequeue_signal(task, &task->blocked, &__info);
+ ret = dequeue_signal(task, &task->blocked, &__info, &__type);
spin_unlock_irq(&task->sighand->siglock);
return ret;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 304f431178fd..c19dd5a2c05c 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,20 +7,8 @@
struct ctl_table;
#ifdef CONFIG_DETECT_HUNG_TASK
-
-#ifdef CONFIG_SMP
-extern unsigned int sysctl_hung_task_all_cpu_backtrace;
-#else
-#define sysctl_hung_task_all_cpu_backtrace 0
-#endif /* CONFIG_SMP */
-
-extern int sysctl_hung_task_check_count;
-extern unsigned int sysctl_hung_task_panic;
+/* used for hung_task and block/ */
extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
#else
/* Avoid need for ifdefs elsewhere in the code */
enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 058d7f371e25..b9198a1b3a84 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -59,6 +59,7 @@ extern void sched_post_fork(struct task_struct *p,
extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void);
+void __noreturn make_task_dead(int signr);
extern void proc_caches_init(void);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 72dbb44a4573..88cc16444b43 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = { \
#define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
- return single_open(file, __name ## _show, PDE_DATA(inode)); \
+ return single_open(file, __name ## _show, pde_data(inode)); \
} \
\
static const struct proc_ops __name ## _proc_ops = { \
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 166158b6e917..e65b80ed09e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -83,8 +83,7 @@ extern void shmem_unlock_mapping(struct address_space *mapping);
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
-extern int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse);
+int shmem_unuse(unsigned int type);
extern bool shmem_is_huge(struct vm_area_struct *vma,
struct inode *inode, pgoff_t index);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 367366f1d1ff..37bde99b74af 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -403,8 +403,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;
- if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000)
- && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
else
BUG();
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d0d188861ad6..b8ba00ccccde 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -59,6 +59,7 @@
#define _raw_spin_lock_nested(lock, subclass) __LOCK(lock)
#define _raw_read_lock(lock) __LOCK(lock)
#define _raw_write_lock(lock) __LOCK(lock)
+#define _raw_write_lock_nested(lock, subclass) __LOCK(lock)
#define _raw_spin_lock_bh(lock) __LOCK_BH(lock)
#define _raw_read_lock_bh(lock) __LOCK_BH(lock)
#define _raw_write_lock_bh(lock) __LOCK_BH(lock)
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index c34b55a6e554..17f992fe6355 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t gfp_flags, bool can_alloc);
+/*
+ * Every user of stack depot has to call this during its own init when it's
+ * decided that it will be calling stack_depot_save() later.
+ *
+ * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
+ * enabled as part of mm_init(), for subsystems where it's known at compile time
+ * that stack depot will be used.
+ */
+int stack_depot_init(void);
+
+#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
+static inline int stack_depot_early_init(void) { return stack_depot_init(); }
+#else
+static inline int stack_depot_early_init(void) { return 0; }
+#endif
+
depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries, gfp_t gfp_flags);
@@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
void stack_depot_print(depot_stack_handle_t stack);
-#ifdef CONFIG_STACKDEPOT
-int stack_depot_init(void);
-#else
-static inline int stack_depot_init(void)
-{
- return 0;
-}
-#endif /* CONFIG_STACKDEPOT */
-
#endif
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index a59db2f08e76..ccaab2043fcd 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -23,11 +23,6 @@ static inline void stackleak_task_init(struct task_struct *t)
# endif
}
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-int stack_erasing_sysctl(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
static inline void stackleak_task_init(struct task_struct *t) { }
#endif
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 0ae28ae6caf2..f35c22b3355f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -64,10 +64,9 @@ struct svc_serv_ops {
/* queue up a transport for servicing */
void (*svo_enqueue_xprt)(struct svc_xprt *);
- /* set up thread (or whatever) execution context */
- int (*svo_setup)(struct svc_serv *, struct svc_pool *, int);
-
- /* optional module to count when adding threads (pooled svcs only) */
+ /* optional module to count when adding threads.
+ * Thread function must call module_put_and_kthread_exit() to exit.
+ */
struct module *svo_module;
};
@@ -85,6 +84,7 @@ struct svc_serv {
struct svc_program * sv_program; /* RPC program */
struct svc_stat * sv_stats; /* RPC statistics */
spinlock_t sv_lock;
+ struct kref sv_refcnt;
unsigned int sv_nrthreads; /* # of server threads */
unsigned int sv_maxconn; /* max connections allowed or
* '0' causing max to be based
@@ -114,15 +114,43 @@ struct svc_serv {
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
-/*
- * We use sv_nrthreads as a reference count. svc_destroy() drops
- * this refcount, so we need to bump it up around operations that
- * change the number of threads. Horrible, but there it is.
- * Should be called with the "service mutex" held.
+/**
+ * svc_get() - increment reference count on a SUNRPC serv
+ * @serv: the svc_serv to have count incremented
+ *
+ * Returns: the svc_serv that was passed in.
+ */
+static inline struct svc_serv *svc_get(struct svc_serv *serv)
+{
+ kref_get(&serv->sv_refcnt);
+ return serv;
+}
+
+void svc_destroy(struct kref *);
+
+/**
+ * svc_put - decrement reference count on a SUNRPC serv
+ * @serv: the svc_serv to have count decremented
+ *
+ * When the reference count reaches zero, svc_destroy()
+ * is called to clean up and free the serv.
+ */
+static inline void svc_put(struct svc_serv *serv)
+{
+ kref_put(&serv->sv_refcnt, svc_destroy);
+}
+
+/**
+ * svc_put_not_last - decrement non-final reference count on SUNRPC serv
+ * @serv: the svc_serv to have count decremented
+ *
+ * Returns: %true is refcount was decremented.
+ *
+ * If the refcount is 1, it is not decremented and instead failure is reported.
*/
-static inline void svc_get(struct svc_serv *serv)
+static inline bool svc_put_not_last(struct svc_serv *serv)
{
- serv->sv_nrthreads++;
+ return refcount_dec_not_one(&serv->sv_refcnt.refcount);
}
/*
@@ -469,29 +497,6 @@ struct svc_procedure {
};
/*
- * Mode for mapping cpus to pools.
- */
-enum {
- SVC_POOL_AUTO = -1, /* choose one of the others */
- SVC_POOL_GLOBAL, /* no mapping, just a single global pool
- * (legacy & UP mode) */
- SVC_POOL_PERCPU, /* one pool per cpu */
- SVC_POOL_PERNODE /* one pool per numa node */
-};
-
-struct svc_pool_map {
- int count; /* How many svc_servs use us */
- int mode; /* Note: int not enum to avoid
- * warnings about "enumeration value
- * not handled in switch" */
- unsigned int npools;
- unsigned int *pool_to; /* maps pool id to cpu or node */
- unsigned int *to_pool; /* maps cpu or node to pool id */
-};
-
-extern struct svc_pool_map svc_pool_map;
-
-/*
* Function prototypes.
*/
int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
@@ -501,20 +506,14 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int,
const struct svc_serv_ops *);
struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
-struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
- struct svc_pool *pool, int node);
void svc_rqst_replace_page(struct svc_rqst *rqstp,
struct page *page);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
-unsigned int svc_pool_map_get(void);
-void svc_pool_map_put(void);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
const struct svc_serv_ops *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
-int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
-void svc_destroy(struct svc_serv *);
void svc_shutdown_net(struct svc_serv *, struct net *);
int svc_process(struct svc_rqst *);
int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d1ea44b31f19..1d38d9475c4d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -514,7 +514,7 @@ extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
-extern bool reuse_swap_page(struct page *, int *);
+extern bool reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
@@ -680,8 +680,8 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}
-#define reuse_swap_page(page, total_map_swapcount) \
- (page_trans_huge_mapcount(page, total_map_swapcount) == 1)
+#define reuse_swap_page(page) \
+ (page_trans_huge_mapcount(page) == 1)
static inline int try_to_free_swap(struct page *page)
{
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e06febf62978..54078542134c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,10 +6,7 @@
* these were static in swapfile.c but frontswap.c needs them and we don't
* want to expose them to the dozens of source files that include swap.h
*/
-extern spinlock_t swap_lock;
-extern struct plist_head swap_active_head;
extern struct swap_info_struct *swap_info[];
-extern int try_to_unuse(unsigned int, bool, unsigned long);
extern unsigned long generic_max_swapfile_size(void);
extern unsigned long max_swapfile_size(void);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 569272871375..f6c3638255d5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
* @end: The end address of the swiotlb memory pool. Used to do a quick
* range check to see if the memory was in fact allocated by this
* API.
+ * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool
+ * may be remapped in the memory encrypted case and store virtual
+ * address for bounce buffer operation.
* @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
* @end. For default swiotlb, this is command line adjustable via
* setup_io_tlb_npages.
@@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
struct io_tlb_mem {
phys_addr_t start;
phys_addr_t end;
+ void *vaddr;
unsigned long nslabs;
unsigned long used;
unsigned int index;
@@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
}
#endif /* CONFIG_DMA_RESTRICTED_POOL */
+extern phys_addr_t swiotlb_unencrypted_base;
+
#endif /* __LINUX_SWIOTLB_H */
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index be24056ac00f..48fabe36509e 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -337,8 +337,6 @@ enum {
NTB_CTRL_REQ_ID_EN = 1 << 0,
NTB_CTRL_LUT_EN = 1 << 0,
-
- NTB_PART_CTRL_ID_PROT_DIS = 1 << 0,
};
struct ntb_ctrl_regs {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 528a478dbda8..819c0cb00b6d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1057,6 +1057,9 @@ asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type ru
const void __user *rule_attr, __u32 flags);
asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags);
asmlinkage long sys_memfd_secret(unsigned int flags);
+asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
+ unsigned long home_node,
+ unsigned long flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1fa2b69c6fc3..180adf7da785 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,12 +38,28 @@ struct ctl_table_header;
struct ctl_dir;
/* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
-#define SYSCTL_ONE ((void *)&sysctl_vals[1])
-#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
+#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
+#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
+#define SYSCTL_ONE ((void *)&sysctl_vals[2])
+#define SYSCTL_TWO ((void *)&sysctl_vals[3])
+#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
+#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
+#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
+#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8])
+#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9])
+
+/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+#define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10])
extern const int sysctl_vals[];
+#define SYSCTL_LONG_ZERO ((void *)&sysctl_long_vals[0])
+#define SYSCTL_LONG_ONE ((void *)&sysctl_long_vals[1])
+#define SYSCTL_LONG_MAX ((void *)&sysctl_long_vals[2])
+
+extern const unsigned long sysctl_long_vals[];
+
typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
@@ -178,6 +194,20 @@ struct ctl_path {
#ifdef CONFIG_SYSCTL
+#define DECLARE_SYSCTL_BASE(_name, _table) \
+static struct ctl_table _name##_base_table[] = { \
+ { \
+ .procname = #_name, \
+ .mode = 0555, \
+ .child = _table, \
+ }, \
+ { }, \
+}
+
+extern int __register_sysctl_base(struct ctl_table *base_table);
+
+#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table)
+
void proc_sys_poll_notify(struct ctl_table_poll *poll);
extern void setup_sysctl_set(struct ctl_table_set *p,
@@ -198,8 +228,19 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
void unregister_sysctl_table(struct ctl_table_header * table);
-extern int sysctl_init(void);
+extern int sysctl_init_bases(void);
+extern void __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name);
+#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
+
void do_sysctl_args(void);
+int do_proc_douintvec(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data);
extern int pwrsw_enabled;
extern int unaligned_enabled;
@@ -207,16 +248,28 @@ extern int unaligned_dump_stack;
extern int no_unaligned_warning;
extern struct ctl_table sysctl_mount_point[];
-extern struct ctl_table random_table[];
-extern struct ctl_table firmware_config_table[];
-extern struct ctl_table epoll_table[];
#else /* CONFIG_SYSCTL */
+
+#define DECLARE_SYSCTL_BASE(_name, _table)
+
+static inline int __register_sysctl_base(struct ctl_table *base_table)
+{
+ return 0;
+}
+
+#define register_sysctl_base(table) __register_sysctl_base(table)
+
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
{
return NULL;
}
+static inline struct sysctl_header *register_sysctl_mount_point(const char *path)
+{
+ return NULL;
+}
+
static inline struct ctl_table_header *register_sysctl_paths(
const struct ctl_path *path, struct ctl_table *table)
{
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2d167ac3452c..70c069aef02c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -172,6 +172,7 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
TRACE_FLAG_NMI = 0x40,
+ TRACE_FLAG_BH_OFF = 0x80,
};
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -782,6 +783,7 @@ enum {
FILTER_OTHER = 0,
FILTER_STATIC_STRING,
FILTER_DYN_STRING,
+ FILTER_RDYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
FILTER_COMM,
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 2564b7434b4d..88c007ab5ebc 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -54,8 +54,7 @@ struct linux_binprm;
/*
* ptrace report for syscall entry and exit looks identical.
*/
-static inline int ptrace_report_syscall(struct pt_regs *regs,
- unsigned long message)
+static inline int ptrace_report_syscall(unsigned long message)
{
int ptrace = current->ptrace;
@@ -102,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs,
static inline __must_check int tracehook_report_syscall_entry(
struct pt_regs *regs)
{
- return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY);
+ return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY);
}
/**
@@ -127,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
if (step)
user_single_step_report(regs);
else
- ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT);
+ ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT);
}
/**
diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
index c0d817de4df2..f4c8eaf4d012 100644
--- a/include/linux/unaligned/packed_struct.h
+++ b/include/linux/unaligned/packed_struct.h
@@ -1,7 +1,7 @@
#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
#define _LINUX_UNALIGNED_PACKED_STRUCT_H
-#include <linux/kernel.h>
+#include <linux/types.h>
struct __una_u16 { u16 x; } __packed;
struct __una_u32 { u32 x; } __packed;
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 74484d44c755..4d39e6e11a95 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -5,9 +5,52 @@
#include <linux/init.h>
#include <linux/dcache.h>
+struct utf8data;
+struct utf8data_table;
+
+#define UNICODE_MAJ_SHIFT 16
+#define UNICODE_MIN_SHIFT 8
+
+#define UNICODE_AGE(MAJ, MIN, REV) \
+ (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
+ ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
+ ((unsigned int)(REV)))
+
+static inline u8 unicode_major(unsigned int age)
+{
+ return (age >> UNICODE_MAJ_SHIFT) & 0xff;
+}
+
+static inline u8 unicode_minor(unsigned int age)
+{
+ return (age >> UNICODE_MIN_SHIFT) & 0xff;
+}
+
+static inline u8 unicode_rev(unsigned int age)
+{
+ return age & 0xff;
+}
+
+/*
+ * Two normalization forms are supported:
+ * 1) NFDI
+ * - Apply unicode normalization form NFD.
+ * - Remove any Default_Ignorable_Code_Point.
+ * 2) NFDICF
+ * - Apply unicode normalization form NFD.
+ * - Remove any Default_Ignorable_Code_Point.
+ * - Apply a full casefold (C + F).
+ */
+enum utf8_normalization {
+ UTF8_NFDI = 0,
+ UTF8_NFDICF,
+ UTF8_NMAX,
+};
+
struct unicode_map {
- const char *charset;
- int version;
+ unsigned int version;
+ const struct utf8data *ntab[UTF8_NMAX];
+ const struct utf8data_table *tables;
};
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
@@ -30,7 +73,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct qstr *str);
-struct unicode_map *utf8_load(const char *version);
+struct unicode_map *utf8_load(unsigned int version);
void utf8_unload(struct unicode_map *um);
#endif /* _LINUX_UNICODE_H */
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index c3011ccda430..2de442ececae 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -64,6 +64,7 @@ struct vdpa_mgmt_dev;
* struct vdpa_device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
+ * @driver_override: driver name to force a match
* @config: the configuration ops for this device.
* @cf_mutex: Protects get and set access to configuration layout.
* @index: device index
@@ -76,6 +77,7 @@ struct vdpa_mgmt_dev;
struct vdpa_device {
struct device dev;
struct device *dma_dev;
+ const char *driver_override;
const struct vdpa_config_ops *config;
struct mutex cf_mutex; /* Protects get/set config */
unsigned int index;
@@ -99,6 +101,7 @@ struct vdpa_dev_set_config {
struct {
u8 mac[ETH_ALEN];
u16 mtu;
+ u16 max_vq_pairs;
} net;
u64 mask;
};
@@ -155,7 +158,7 @@ struct vdpa_map_file {
* @vdev: vdpa device
* @idx: virtqueue index
* @state: pointer to returned state (last_avail_idx)
- * @get_vq_notification: Get the notification area for a virtqueue
+ * @get_vq_notification: Get the notification area for a virtqueue (optional)
* @vdev: vdpa device
* @idx: virtqueue index
* Returns the notifcation area
@@ -169,14 +172,17 @@ struct vdpa_map_file {
* for the device
* @vdev: vdpa device
* Returns virtqueue algin requirement
- * @get_features: Get virtio features supported by the device
+ * @get_device_features: Get virtio features supported by the device
* @vdev: vdpa device
* Returns the virtio features support by the
* device
- * @set_features: Set virtio features supported by the driver
+ * @set_driver_features: Set virtio features supported by the driver
* @vdev: vdpa device
* @features: feature support by the driver
* Returns integer: success (0) or error (< 0)
+ * @get_driver_features: Get the virtio driver features in action
+ * @vdev: vdpa device
+ * Returns the virtio features accepted
* @set_config_cb: Set the config interrupt callback
* @vdev: vdpa device
* @cb: virtio-vdev interrupt callback structure
@@ -276,8 +282,9 @@ struct vdpa_config_ops {
/* Device ops */
u32 (*get_vq_align)(struct vdpa_device *vdev);
- u64 (*get_features)(struct vdpa_device *vdev);
- int (*set_features)(struct vdpa_device *vdev, u64 features);
+ u64 (*get_device_features)(struct vdpa_device *vdev);
+ int (*set_driver_features)(struct vdpa_device *vdev, u64 features);
+ u64 (*get_driver_features)(struct vdpa_device *vdev);
void (*set_config_cb)(struct vdpa_device *vdev,
struct vdpa_callback *cb);
u16 (*get_vq_num_max)(struct vdpa_device *vdev);
@@ -385,23 +392,37 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
static inline int vdpa_reset(struct vdpa_device *vdev)
{
const struct vdpa_config_ops *ops = vdev->config;
+ int ret;
+ mutex_lock(&vdev->cf_mutex);
vdev->features_valid = false;
- return ops->reset(vdev);
+ ret = ops->reset(vdev);
+ mutex_unlock(&vdev->cf_mutex);
+ return ret;
}
-static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
+static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked)
{
const struct vdpa_config_ops *ops = vdev->config;
+ int ret;
+
+ if (!locked)
+ mutex_lock(&vdev->cf_mutex);
vdev->features_valid = true;
- return ops->set_features(vdev, features);
+ ret = ops->set_driver_features(vdev, features);
+ if (!locked)
+ mutex_unlock(&vdev->cf_mutex);
+
+ return ret;
}
void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
const void *buf, unsigned int length);
+void vdpa_set_status(struct vdpa_device *vdev, u8 status);
+
/**
* struct vdpa_mgmtdev_ops - vdpa device ops
* @dev_add: Add a vdpa device using alloc and register
@@ -438,6 +459,8 @@ struct vdpa_mgmt_dev {
const struct virtio_device_id *id_table;
u64 config_attr_mask;
struct list_head list;
+ u64 supported_features;
+ u32 max_supported_vqs;
};
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 41edbc01ffa4..72292a62cd90 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -138,6 +138,7 @@ int virtio_finalize_features(struct virtio_device *dev);
int virtio_device_freeze(struct virtio_device *dev);
int virtio_device_restore(struct virtio_device *dev);
#endif
+void virtio_reset_device(struct virtio_device *dev);
size_t virtio_max_dma_size(struct virtio_device *vdev);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a185cc75ff52..7b2363388bfa 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -98,6 +98,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_SPLIT_PAGE_FAILED,
THP_DEFERRED_SPLIT_PAGE,
THP_SPLIT_PMD,
+ THP_SCAN_EXCEED_NONE_PTE,
+ THP_SCAN_EXCEED_SWAP_PTE,
+ THP_SCAN_EXCEED_SHARED_PTE,
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
THP_SPLIT_PUD,
#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 6e022cc712e6..880227b9f044 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+ !defined(CONFIG_KASAN_VMALLOC)
+#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */
+#else
+#define VM_DEFER_KMEMLEAK 0
+#endif
+
/*
* VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC.
*
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 9c6ec78e47a5..24a509f559ee 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -551,6 +551,4 @@ struct p9_fcall {
int p9_errstr2errno(char *errstr, int len);
int p9_error_init(void);
-int p9_trans_fd_init(void);
-void p9_trans_fd_exit(void);
#endif /* NET_9P_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 15a4e6a9dbf7..ff842f963071 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -54,7 +54,7 @@ struct p9_trans_module {
void v9fs_register_trans(struct p9_trans_module *m);
void v9fs_unregister_trans(struct p9_trans_module *m);
-struct p9_trans_module *v9fs_get_trans_by_name(char *s);
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s);
struct p9_trans_module *v9fs_get_default_trans(void);
void v9fs_put_trans(struct p9_trans_module *m);
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 48cc5795ceda..63540be0fc34 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
static inline void fqdir_pre_exit(struct fqdir *fqdir)
{
- fqdir->high_thresh = 0; /* prevent creation of new frags */
- fqdir->dead = true;
+ /* Prevent creation of new frags.
+ * Pairs with READ_ONCE() in inet_frag_find().
+ */
+ WRITE_ONCE(fqdir->high_thresh, 0);
+
+ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+ * and ip6frag_expire_frag_queue().
+ */
+ WRITE_ONCE(fqdir->dead, true);
}
void fqdir_exit(struct fqdir *fqdir);
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 851029ecff13..0a4779175a52 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
struct sk_buff *head;
rcu_read_lock();
- if (fq->q.fqdir->dead)
+ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(fq->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&fq->q.lock);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ebef45e821af..676cb8ea9e15 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
+ /* Note: we do not own yet a reference on net.
+ * This reference might be taken later from tcf_exts_get_net().
+ */
exts->net = net;
- netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL);
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
if (!exts->actions)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c11dbac5abb2..472843eedbae 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1244,6 +1244,7 @@ struct psched_ratecfg {
u64 rate_bytes_ps; /* bytes per second */
u32 mult;
u16 overhead;
+ u16 mpu;
u8 linklayer;
u8 shift;
};
@@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
{
len += r->overhead;
+ if (len < r->mpu)
+ len = r->mpu;
+
if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
@@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
res->overhead = r->overhead;
+ res->mpu = r->mpu;
res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 0bdbc0d17d2f..d0337a41141c 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -358,7 +358,6 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \
EM ( MF_MSG_SLAB, "kernel slab page" ) \
EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
- EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \
EM ( MF_MSG_HUGE, "huge page" ) \
EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \
@@ -373,7 +372,6 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \
EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \
EM ( MF_MSG_BUDDY, "free buddy page" ) \
- EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \
EM ( MF_MSG_DAX, "dax page" ) \
EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
EMe ( MF_MSG_UNKNOWN, "unknown page" )
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index ab7557d84f75..647c53b26105 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -415,9 +415,8 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
int retries, struct scsi_mode_data *data,
struct scsi_sense_hdr *);
extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
- int modepage, unsigned char *buffer, int len,
- int timeout, int retries,
- struct scsi_mode_data *data,
+ unsigned char *buffer, int len, int timeout,
+ int retries, struct scsi_mode_data *data,
struct scsi_sense_hdr *);
extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout,
int retries, struct scsi_sense_hdr *sshdr);
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 843cefb8efce..068e35d36557 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -29,10 +29,6 @@
* For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html
*/
-#ifdef __KERNEL__
-extern int sg_big_buff; /* for sysctl */
-#endif
-
typedef struct sg_iovec /* same structure as used by readv() Linux system */
{ /* call. It defines one scatter-gather element. */
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index a8e97f84b652..7660a7846586 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -21,6 +21,22 @@
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field) \
+ ((void *)(&__entry->__rel_loc_##field) + \
+ sizeof(__entry->__rel_loc_##field) + \
+ (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field) \
+ ((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
#undef __perf_count
#define __perf_count(c) (c)
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 1172529b5b49..c6f5aa74db89 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref,
TRACE_EVENT(cachefiles_lookup,
TP_PROTO(struct cachefiles_object *obj,
+ struct dentry *dir,
struct dentry *de),
- TP_ARGS(obj, de),
+ TP_ARGS(obj, dir, de),
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(short, error )
+ __field(unsigned long, dino )
__field(unsigned long, ino )
),
TP_fast_assign(
- __entry->obj = obj->debug_id;
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->dino = d_backing_inode(dir)->i_ino;
__entry->ino = (!IS_ERR(de) && d_backing_inode(de) ?
d_backing_inode(de)->i_ino : 0);
__entry->error = IS_ERR(de) ? PTR_ERR(de) : 0;
),
- TP_printk("o=%08x i=%lx e=%d",
- __entry->obj, __entry->ino, __entry->error)
+ TP_printk("o=%08x dB=%lx B=%lx e=%d",
+ __entry->obj, __entry->dino, __entry->ino, __entry->error)
+ );
+
+TRACE_EVENT(cachefiles_mkdir,
+ TP_PROTO(struct dentry *dir, struct dentry *subdir),
+
+ TP_ARGS(dir, subdir),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, dir )
+ __field(unsigned int, subdir )
+ ),
+
+ TP_fast_assign(
+ __entry->dir = d_backing_inode(dir)->i_ino;
+ __entry->subdir = d_backing_inode(subdir)->i_ino;
+ ),
+
+ TP_printk("dB=%x sB=%x",
+ __entry->dir,
+ __entry->subdir)
);
TRACE_EVENT(cachefiles_tmpfile,
@@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
@@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
TRACE_EVENT(cachefiles_unlink,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p w=%s",
- __entry->obj, __entry->de,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
TRACE_EVENT(cachefiles_rename,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
- struct dentry *to,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, to, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
- __field(struct dentry *, to )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
- __entry->to = to;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p t=%p w=%s",
- __entry->obj, __entry->de, __entry->to,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
@@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency,
__entry->ino = ino;
),
- TP_printk("o=%08x %s i=%llx c=%u",
+ TP_printk("o=%08x %s B=%llx c=%u",
__entry->obj,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino,
@@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency,
__entry->ino = ino;
),
- TP_printk("V=%08x %s i=%llx",
+ TP_printk("V=%08x %s B=%llx",
__entry->vol,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino)
@@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read,
__entry->cache_inode = cache_inode;
),
- TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
+ TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->why, cachefiles_prepare_read_traces),
@@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc,
__entry->why = why;
),
- TP_printk("o=%08x b=%08x %s l=%llx->%llx",
+ TP_printk("o=%08x B=%x %s l=%llx->%llx",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->why, cachefiles_trunc_traces),
@@ -549,7 +569,28 @@ TRACE_EVENT(cachefiles_mark_active,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
+ __entry->obj, __entry->inode)
+ );
+
+TRACE_EVENT(cachefiles_mark_failed,
+ TP_PROTO(struct cachefiles_object *obj,
+ struct inode *inode),
+
+ TP_ARGS(obj, inode),
+
+ /* Note that obj may be NULL */
+ TP_STRUCT__entry(
+ __field(unsigned int, obj )
+ __field(ino_t, inode )
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->inode = inode->i_ino;
+ ),
+
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -570,7 +611,7 @@ TRACE_EVENT(cachefiles_mark_inactive,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -594,7 +635,7 @@ TRACE_EVENT(cachefiles_vfs_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
@@ -621,7 +662,7 @@ TRACE_EVENT(cachefiles_io_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 54e5bf081171..7d48e7079e48 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -68,10 +68,9 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
TRACE_EVENT(mm_compaction_migratepages,
TP_PROTO(unsigned long nr_all,
- int migrate_rc,
- struct list_head *migratepages),
+ unsigned int nr_succeeded),
- TP_ARGS(nr_all, migrate_rc, migratepages),
+ TP_ARGS(nr_all, nr_succeeded),
TP_STRUCT__entry(
__field(unsigned long, nr_migrated)
@@ -79,23 +78,8 @@ TRACE_EVENT(mm_compaction_migratepages,
),
TP_fast_assign(
- unsigned long nr_failed = 0;
- struct list_head *page_lru;
-
- /*
- * migrate_pages() returns either a non-negative number
- * with the number of pages that failed migration, or an
- * error code, in which case we need to count the remaining
- * pages manually
- */
- if (migrate_rc >= 0)
- nr_failed = migrate_rc;
- else
- list_for_each(page_lru, migratepages)
- nr_failed++;
-
- __entry->nr_migrated = nr_all - nr_failed;
- __entry->nr_failed = nr_failed;
+ __entry->nr_migrated = nr_succeeded;
+ __entry->nr_failed = nr_all - nr_succeeded;
),
TP_printk("nr_migrated=%lu nr_failed=%lu",
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index 2f422f4f1fb9..c79f1d4c39af 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -11,10 +11,10 @@
TRACE_EVENT(damon_aggregated,
- TP_PROTO(struct damon_target *t, struct damon_region *r,
- unsigned int nr_regions),
+ TP_PROTO(struct damon_target *t, unsigned int target_id,
+ struct damon_region *r, unsigned int nr_regions),
- TP_ARGS(t, r, nr_regions),
+ TP_ARGS(t, target_id, r, nr_regions),
TP_STRUCT__entry(
__field(unsigned long, target_id)
@@ -22,19 +22,22 @@ TRACE_EVENT(damon_aggregated,
__field(unsigned long, start)
__field(unsigned long, end)
__field(unsigned int, nr_accesses)
+ __field(unsigned int, age)
),
TP_fast_assign(
- __entry->target_id = t->id;
+ __entry->target_id = target_id;
__entry->nr_regions = nr_regions;
__entry->start = r->ar.start;
__entry->end = r->ar.end;
__entry->nr_accesses = r->nr_accesses;
+ __entry->age = r->age;
),
- TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u",
+ TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u",
__entry->target_id, __entry->nr_regions,
- __entry->start, __entry->end, __entry->nr_accesses)
+ __entry->start, __entry->end,
+ __entry->nr_accesses, __entry->age)
);
#endif /* _TRACE_DAMON_H */
diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h
index 96f64bf218b2..a1922a800e6f 100644
--- a/include/trace/events/error_report.h
+++ b/include/trace/events/error_report.h
@@ -17,14 +17,16 @@
enum error_detector {
ERROR_DETECTOR_KFENCE,
- ERROR_DETECTOR_KASAN
+ ERROR_DETECTOR_KASAN,
+ ERROR_DETECTOR_WARN,
};
#endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
-#define error_detector_list \
+#define error_detector_list \
EM(ERROR_DETECTOR_KFENCE, "kfence") \
- EMe(ERROR_DETECTOR_KASAN, "kasan")
+ EM(ERROR_DETECTOR_KASAN, "kasan") \
+ EMe(ERROR_DETECTOR_WARN, "warning")
/* Always end the list with an EMe. */
#undef EM
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f8cb916f3595..f701bb23f83c 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
TRACE_EVENT(f2fs_file_write_iter,
- TP_PROTO(struct inode *inode, unsigned long offset,
- unsigned long length, int ret),
+ TP_PROTO(struct inode *inode, loff_t offset, size_t length,
+ ssize_t ret),
TP_ARGS(inode, offset, length, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
- __field(unsigned long, offset)
- __field(unsigned long, length)
- __field(int, ret)
+ __field(loff_t, offset)
+ __field(size_t, length)
+ __field(ssize_t, ret)
),
TP_fast_assign(
@@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter,
),
TP_printk("dev = (%d,%d), ino = %lu, "
- "offset = %lu, length = %lu, written(err) = %d",
+ "offset = %lld, length = %zu, written(err) = %zd",
show_dev_ino(__entry),
__entry->offset,
__entry->length,
@@ -936,14 +936,14 @@ TRACE_EVENT(f2fs_fallocate,
TRACE_EVENT(f2fs_direct_IO_enter,
- TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+ TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw),
- TP_ARGS(inode, offset, len, rw),
+ TP_ARGS(inode, iocb, len, rw),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
- __field(loff_t, pos)
+ __field(struct kiocb *, iocb)
__field(unsigned long, len)
__field(int, rw)
),
@@ -951,15 +951,18 @@ TRACE_EVENT(f2fs_direct_IO_enter,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->pos = offset;
+ __entry->iocb = iocb;
__entry->len = len;
__entry->rw = rw;
),
- TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d",
show_dev_ino(__entry),
- __entry->pos,
+ __entry->iocb->ki_pos,
__entry->len,
+ __entry->iocb->ki_flags,
+ __entry->iocb->ki_hint,
+ __entry->iocb->ki_ioprio,
__entry->rw)
);
diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index ab69434e2329..d4e631aa976f 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -132,9 +132,37 @@
ata_protocol_name(ATAPI_PROT_PIO), \
ata_protocol_name(ATAPI_PROT_DMA))
+#define ata_class_name(class) { class, #class }
+#define show_class_name(val) \
+ __print_symbolic(val, \
+ ata_class_name(ATA_DEV_UNKNOWN), \
+ ata_class_name(ATA_DEV_ATA), \
+ ata_class_name(ATA_DEV_ATA_UNSUP), \
+ ata_class_name(ATA_DEV_ATAPI), \
+ ata_class_name(ATA_DEV_ATAPI_UNSUP), \
+ ata_class_name(ATA_DEV_PMP), \
+ ata_class_name(ATA_DEV_PMP_UNSUP), \
+ ata_class_name(ATA_DEV_SEMB), \
+ ata_class_name(ATA_DEV_SEMB_UNSUP), \
+ ata_class_name(ATA_DEV_ZAC), \
+ ata_class_name(ATA_DEV_ZAC_UNSUP), \
+ ata_class_name(ATA_DEV_NONE))
+
+#define ata_sff_hsm_state_name(state) { state, #state }
+#define show_sff_hsm_state_name(val) \
+ __print_symbolic(val, \
+ ata_sff_hsm_state_name(HSM_ST_IDLE), \
+ ata_sff_hsm_state_name(HSM_ST_FIRST), \
+ ata_sff_hsm_state_name(HSM_ST), \
+ ata_sff_hsm_state_name(HSM_ST_LAST), \
+ ata_sff_hsm_state_name(HSM_ST_ERR))
+
const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
#define __parse_status(s) libata_trace_parse_status(p, s)
+const char *libata_trace_parse_host_stat(struct trace_seq *, unsigned char);
+#define __parse_host_stat(s) libata_trace_parse_host_stat(p, s)
+
const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int);
#define __parse_eh_action(a) libata_trace_parse_eh_action(p, a)
@@ -144,11 +172,14 @@ const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int);
const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int);
#define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f)
+const char *libata_trace_parse_tf_flags(struct trace_seq *, unsigned int);
+#define __parse_tf_flags(f) libata_trace_parse_tf_flags(p, f)
+
const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char,
unsigned char, unsigned char);
#define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h)
-TRACE_EVENT(ata_qc_issue,
+DECLARE_EVENT_CLASS(ata_qc_issue_template,
TP_PROTO(struct ata_queued_cmd *qc),
@@ -207,6 +238,14 @@ TRACE_EVENT(ata_qc_issue,
__entry->dev)
);
+DEFINE_EVENT(ata_qc_issue_template, ata_qc_prep,
+ TP_PROTO(struct ata_queued_cmd *qc),
+ TP_ARGS(qc));
+
+DEFINE_EVENT(ata_qc_issue_template, ata_qc_issue,
+ TP_PROTO(struct ata_queued_cmd *qc),
+ TP_ARGS(qc));
+
DECLARE_EVENT_CLASS(ata_qc_complete_template,
TP_PROTO(struct ata_queued_cmd *qc),
@@ -275,6 +314,128 @@ DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done,
TP_PROTO(struct ata_queued_cmd *qc),
TP_ARGS(qc));
+TRACE_EVENT(ata_tf_load,
+
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf),
+
+ TP_ARGS(ap, tf),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned char, cmd )
+ __field( unsigned char, dev )
+ __field( unsigned char, lbal )
+ __field( unsigned char, lbam )
+ __field( unsigned char, lbah )
+ __field( unsigned char, nsect )
+ __field( unsigned char, feature )
+ __field( unsigned char, hob_lbal )
+ __field( unsigned char, hob_lbam )
+ __field( unsigned char, hob_lbah )
+ __field( unsigned char, hob_nsect )
+ __field( unsigned char, hob_feature )
+ __field( unsigned char, proto )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = ap->print_id;
+ __entry->proto = tf->protocol;
+ __entry->cmd = tf->command;
+ __entry->dev = tf->device;
+ __entry->lbal = tf->lbal;
+ __entry->lbam = tf->lbam;
+ __entry->lbah = tf->lbah;
+ __entry->hob_lbal = tf->hob_lbal;
+ __entry->hob_lbam = tf->hob_lbam;
+ __entry->hob_lbah = tf->hob_lbah;
+ __entry->feature = tf->feature;
+ __entry->hob_feature = tf->hob_feature;
+ __entry->nsect = tf->nsect;
+ __entry->hob_nsect = tf->hob_nsect;
+ ),
+
+ TP_printk("ata_port=%u proto=%s cmd=%s%s " \
+ " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)",
+ __entry->ata_port,
+ show_protocol_name(__entry->proto),
+ show_opcode_name(__entry->cmd),
+ __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect),
+ __entry->cmd, __entry->feature, __entry->nsect,
+ __entry->lbal, __entry->lbam, __entry->lbah,
+ __entry->hob_feature, __entry->hob_nsect,
+ __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah,
+ __entry->dev)
+);
+
+DECLARE_EVENT_CLASS(ata_exec_command_template,
+
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+
+ TP_ARGS(ap, tf, tag),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned int, tag )
+ __field( unsigned char, cmd )
+ __field( unsigned char, feature )
+ __field( unsigned char, hob_nsect )
+ __field( unsigned char, proto )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = ap->print_id;
+ __entry->tag = tag;
+ __entry->proto = tf->protocol;
+ __entry->cmd = tf->command;
+ __entry->feature = tf->feature;
+ __entry->hob_nsect = tf->hob_nsect;
+ ),
+
+ TP_printk("ata_port=%u tag=%d proto=%s cmd=%s%s",
+ __entry->ata_port, __entry->tag,
+ show_protocol_name(__entry->proto),
+ show_opcode_name(__entry->cmd),
+ __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect))
+);
+
+DEFINE_EVENT(ata_exec_command_template, ata_exec_command,
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+ TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_setup,
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+ TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_start,
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+ TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_stop,
+ TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+ TP_ARGS(ap, tf, tag));
+
+TRACE_EVENT(ata_bmdma_status,
+
+ TP_PROTO(struct ata_port *ap, unsigned int host_stat),
+
+ TP_ARGS(ap, host_stat),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned int, tag )
+ __field( unsigned char, host_stat )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = ap->print_id;
+ __entry->host_stat = host_stat;
+ ),
+
+ TP_printk("ata_port=%u host_stat=%s",
+ __entry->ata_port,
+ __parse_host_stat(__entry->host_stat))
+);
+
TRACE_EVENT(ata_eh_link_autopsy,
TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask),
@@ -329,6 +490,259 @@ TRACE_EVENT(ata_eh_link_autopsy_qc,
__parse_eh_err_mask(__entry->eh_err_mask))
);
+DECLARE_EVENT_CLASS(ata_eh_action_template,
+
+ TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+
+ TP_ARGS(link, devno, eh_action),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned int, ata_dev )
+ __field( unsigned int, eh_action )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = link->ap->print_id;
+ __entry->ata_dev = link->pmp + devno;
+ __entry->eh_action = eh_action;
+ ),
+
+ TP_printk("ata_port=%u ata_dev=%u eh_action=%s",
+ __entry->ata_port, __entry->ata_dev,
+ __parse_eh_action(__entry->eh_action))
+);
+
+DEFINE_EVENT(ata_eh_action_template, ata_eh_about_to_do,
+ TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+ TP_ARGS(link, devno, eh_action));
+
+DEFINE_EVENT(ata_eh_action_template, ata_eh_done,
+ TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+ TP_ARGS(link, devno, eh_action));
+
+DECLARE_EVENT_CLASS(ata_link_reset_begin_template,
+
+ TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+
+ TP_ARGS(link, class, deadline),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __array( unsigned int, class, 2 )
+ __field( unsigned long, deadline )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = link->ap->print_id;
+ memcpy(__entry->class, class, 2);
+ __entry->deadline = deadline;
+ ),
+
+ TP_printk("ata_port=%u deadline=%lu classes=[%s,%s]",
+ __entry->ata_port, __entry->deadline,
+ show_class_name(__entry->class[0]),
+ show_class_name(__entry->class[1]))
+);
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_link_hardreset_begin,
+ TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+ TP_ARGS(link, class, deadline));
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_slave_hardreset_begin,
+ TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+ TP_ARGS(link, class, deadline));
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_link_softreset_begin,
+ TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+ TP_ARGS(link, class, deadline));
+
+DECLARE_EVENT_CLASS(ata_link_reset_end_template,
+
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+
+ TP_ARGS(link, class, rc),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __array( unsigned int, class, 2 )
+ __field( int, rc )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = link->ap->print_id;
+ memcpy(__entry->class, class, 2);
+ __entry->rc = rc;
+ ),
+
+ TP_printk("ata_port=%u rc=%d class=[%s,%s]",
+ __entry->ata_port, __entry->rc,
+ show_class_name(__entry->class[0]),
+ show_class_name(__entry->class[1]))
+);
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_hardreset_end,
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+ TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_slave_hardreset_end,
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+ TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_softreset_end,
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+ TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_postreset,
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+ TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset,
+ TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+ TP_ARGS(link, class, rc));
+
+DECLARE_EVENT_CLASS(ata_port_eh_begin_template,
+
+ TP_PROTO(struct ata_port *ap),
+
+ TP_ARGS(ap),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = ap->print_id;
+ ),
+
+ TP_printk("ata_port=%u", __entry->ata_port)
+);
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_std_sched_eh,
+ TP_PROTO(struct ata_port *ap),
+ TP_ARGS(ap));
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_port_freeze,
+ TP_PROTO(struct ata_port *ap),
+ TP_ARGS(ap));
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_port_thaw,
+ TP_PROTO(struct ata_port *ap),
+ TP_ARGS(ap));
+
+DECLARE_EVENT_CLASS(ata_sff_hsm_template,
+
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned char status),
+
+ TP_ARGS(qc, status),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned int, ata_dev )
+ __field( unsigned int, tag )
+ __field( unsigned int, qc_flags )
+ __field( unsigned int, protocol )
+ __field( unsigned int, hsm_state )
+ __field( unsigned char, dev_state )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = qc->ap->print_id;
+ __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno;
+ __entry->tag = qc->tag;
+ __entry->qc_flags = qc->flags;
+ __entry->protocol = qc->tf.protocol;
+ __entry->hsm_state = qc->ap->hsm_task_state;
+ __entry->dev_state = status;
+ ),
+
+ TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s flags=%s task_state=%s dev_stat=0x%X",
+ __entry->ata_port, __entry->ata_dev, __entry->tag,
+ show_protocol_name(__entry->protocol),
+ __parse_qc_flags(__entry->qc_flags),
+ show_sff_hsm_state_name(__entry->hsm_state),
+ __entry->dev_state)
+);
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_state,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+ TP_ARGS(qc, state));
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_command_complete,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+ TP_ARGS(qc, state));
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_port_intr,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+ TP_ARGS(qc, state));
+
+DECLARE_EVENT_CLASS(ata_transfer_data_template,
+
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+
+ TP_ARGS(qc, offset, count),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned int, ata_dev )
+ __field( unsigned int, tag )
+ __field( unsigned int, flags )
+ __field( unsigned int, offset )
+ __field( unsigned int, bytes )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = qc->ap->print_id;
+ __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno;
+ __entry->tag = qc->tag;
+ __entry->flags = qc->tf.flags;
+ __entry->offset = offset;
+ __entry->bytes = count;
+ ),
+
+ TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s offset=%u bytes=%u",
+ __entry->ata_port, __entry->ata_dev, __entry->tag,
+ __parse_tf_flags(__entry->flags),
+ __entry->offset, __entry->bytes)
+);
+
+DEFINE_EVENT(ata_transfer_data_template, ata_sff_pio_transfer_data,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+ TP_ARGS(qc, offset, count));
+
+DEFINE_EVENT(ata_transfer_data_template, atapi_pio_transfer_data,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+ TP_ARGS(qc, offset, count));
+
+DEFINE_EVENT(ata_transfer_data_template, atapi_send_cdb,
+ TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+ TP_ARGS(qc, offset, count));
+
+DECLARE_EVENT_CLASS(ata_sff_template,
+
+ TP_PROTO(struct ata_port *ap),
+
+ TP_ARGS(ap),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, ata_port )
+ __field( unsigned char, hsm_state )
+ ),
+
+ TP_fast_assign(
+ __entry->ata_port = ap->print_id;
+ __entry->hsm_state = ap->hsm_task_state;
+ ),
+
+ TP_printk("ata_port=%u task_state=%s",
+ __entry->ata_port,
+ show_sff_hsm_state_name(__entry->hsm_state))
+);
+
+DEFINE_EVENT(ata_sff_template, ata_sff_flush_pio_task,
+ TP_PROTO(struct ata_port *ap),
+ TP_ARGS(ap));
+
#endif /* _TRACE_LIBATA_H */
/* This part must be outside protection */
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index 3d7b432ca5f3..a2d9aa16a5d7 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -28,80 +28,71 @@ TRACE_EVENT(add_device_randomness,
);
DECLARE_EVENT_CLASS(random__mix_pool_bytes,
- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+ TP_PROTO(int bytes, unsigned long IP),
- TP_ARGS(pool_name, bytes, IP),
+ TP_ARGS(bytes, IP),
TP_STRUCT__entry(
- __field( const char *, pool_name )
__field( int, bytes )
__field(unsigned long, IP )
),
TP_fast_assign(
- __entry->pool_name = pool_name;
__entry->bytes = bytes;
__entry->IP = IP;
),
- TP_printk("%s pool: bytes %d caller %pS",
- __entry->pool_name, __entry->bytes, (void *)__entry->IP)
+ TP_printk("input pool: bytes %d caller %pS",
+ __entry->bytes, (void *)__entry->IP)
);
DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+ TP_PROTO(int bytes, unsigned long IP),
- TP_ARGS(pool_name, bytes, IP)
+ TP_ARGS(bytes, IP)
);
DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+ TP_PROTO(int bytes, unsigned long IP),
- TP_ARGS(pool_name, bytes, IP)
+ TP_ARGS(bytes, IP)
);
TRACE_EVENT(credit_entropy_bits,
- TP_PROTO(const char *pool_name, int bits, int entropy_count,
- unsigned long IP),
+ TP_PROTO(int bits, int entropy_count, unsigned long IP),
- TP_ARGS(pool_name, bits, entropy_count, IP),
+ TP_ARGS(bits, entropy_count, IP),
TP_STRUCT__entry(
- __field( const char *, pool_name )
__field( int, bits )
__field( int, entropy_count )
__field(unsigned long, IP )
),
TP_fast_assign(
- __entry->pool_name = pool_name;
__entry->bits = bits;
__entry->entropy_count = entropy_count;
__entry->IP = IP;
),
- TP_printk("%s pool: bits %d entropy_count %d caller %pS",
- __entry->pool_name, __entry->bits,
- __entry->entropy_count, (void *)__entry->IP)
+ TP_printk("input pool: bits %d entropy_count %d caller %pS",
+ __entry->bits, __entry->entropy_count, (void *)__entry->IP)
);
TRACE_EVENT(debit_entropy,
- TP_PROTO(const char *pool_name, int debit_bits),
+ TP_PROTO(int debit_bits),
- TP_ARGS(pool_name, debit_bits),
+ TP_ARGS( debit_bits),
TP_STRUCT__entry(
- __field( const char *, pool_name )
__field( int, debit_bits )
),
TP_fast_assign(
- __entry->pool_name = pool_name;
__entry->debit_bits = debit_bits;
),
- TP_printk("%s: debit_bits %d", __entry->pool_name,
- __entry->debit_bits)
+ TP_printk("input pool: debit_bits %d", __entry->debit_bits)
);
TRACE_EVENT(add_input_randomness,
@@ -170,36 +161,31 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
);
DECLARE_EVENT_CLASS(random__extract_entropy,
- TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
- unsigned long IP),
+ TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
- TP_ARGS(pool_name, nbytes, entropy_count, IP),
+ TP_ARGS(nbytes, entropy_count, IP),
TP_STRUCT__entry(
- __field( const char *, pool_name )
__field( int, nbytes )
__field( int, entropy_count )
__field(unsigned long, IP )
),
TP_fast_assign(
- __entry->pool_name = pool_name;
__entry->nbytes = nbytes;
__entry->entropy_count = entropy_count;
__entry->IP = IP;
),
- TP_printk("%s pool: nbytes %d entropy_count %d caller %pS",
- __entry->pool_name, __entry->nbytes, __entry->entropy_count,
- (void *)__entry->IP)
+ TP_printk("input pool: nbytes %d entropy_count %d caller %pS",
+ __entry->nbytes, __entry->entropy_count, (void *)__entry->IP)
);
DEFINE_EVENT(random__extract_entropy, extract_entropy,
- TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
- unsigned long IP),
+ TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
- TP_ARGS(pool_name, nbytes, entropy_count, IP)
+ TP_ARGS(nbytes, entropy_count, IP)
);
TRACE_EVENT(urandom_read,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 3a99358c262b..1e566ac4b812 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1744,10 +1744,11 @@ TRACE_EVENT(svc_xprt_create_err,
const char *program,
const char *protocol,
struct sockaddr *sap,
+ size_t salen,
const struct svc_xprt *xprt
),
- TP_ARGS(program, protocol, sap, xprt),
+ TP_ARGS(program, protocol, sap, salen, xprt),
TP_STRUCT__entry(
__field(long, error)
@@ -1760,7 +1761,7 @@ TRACE_EVENT(svc_xprt_create_err,
__entry->error = PTR_ERR(xprt);
__assign_str(program, program);
__assign_str(protocol, protocol);
- memcpy(__entry->addr, sap, sizeof(__entry->addr));
+ memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr)));
),
TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
@@ -1768,7 +1769,7 @@ TRACE_EVENT(svc_xprt_create_err,
__entry->error)
);
-TRACE_EVENT(svc_xprt_do_enqueue,
+TRACE_EVENT(svc_xprt_enqueue,
TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
TP_ARGS(xprt, rqst),
@@ -1815,7 +1816,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
), \
TP_ARGS(xprt))
-DEFINE_SVC_XPRT_EVENT(received);
DEFINE_SVC_XPRT_EVENT(no_write_space);
DEFINE_SVC_XPRT_EVENT(close);
DEFINE_SVC_XPRT_EVENT(detach);
@@ -1902,27 +1902,6 @@ TRACE_EVENT(svc_alloc_arg_err,
TP_printk("pages=%u", __entry->pages)
);
-TRACE_EVENT(svc_handle_xprt,
- TP_PROTO(struct svc_xprt *xprt, int len),
-
- TP_ARGS(xprt, len),
-
- TP_STRUCT__entry(
- __field(int, len)
- __field(unsigned long, flags)
- __string(addr, xprt->xpt_remotebuf)
- ),
-
- TP_fast_assign(
- __entry->len = len;
- __entry->flags = xprt->xpt_flags;
- __assign_str(addr, xprt->xpt_remotebuf);
- ),
-
- TP_printk("addr=%s len=%d flags=%s", __get_str(addr),
- __entry->len, show_svc_xprt_flags(__entry->flags))
-);
-
TRACE_EVENT(svc_stats_latency,
TP_PROTO(const struct svc_rqst *rqst),
@@ -2146,17 +2125,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class,
TP_STRUCT__entry(
__field(long, status)
__string(service, service)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __field(unsigned int, netns_ino)
),
TP_fast_assign(
__entry->status = status;
__assign_str(service, service);
- memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr));
+ __entry->netns_ino = xprt->xpt_net->ns.inum;
),
- TP_printk("listener=%pISpc service=%s status=%ld",
- __entry->addr, __get_str(service), __entry->status
+ TP_printk("addr=listener service=%s status=%ld",
+ __get_str(service), __entry->status
)
);
diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
index d7fbbe551841..ca3f2767828a 100644
--- a/include/trace/events/thp.h
+++ b/include/trace/events/thp.h
@@ -8,24 +8,6 @@
#include <linux/types.h>
#include <linux/tracepoint.h>
-TRACE_EVENT(hugepage_invalidate,
-
- TP_PROTO(unsigned long addr, unsigned long pte),
- TP_ARGS(addr, pte),
- TP_STRUCT__entry(
- __field(unsigned long, addr)
- __field(unsigned long, pte)
- ),
-
- TP_fast_assign(
- __entry->addr = addr;
- __entry->pte = pte;
- ),
-
- TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx",
- __entry->addr, __entry->pte)
-);
-
TRACE_EVENT(hugepage_set_pmd,
TP_PROTO(unsigned long addr, unsigned long pmd),
@@ -65,23 +47,6 @@ TRACE_EVENT(hugepage_update,
TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
);
-TRACE_EVENT(hugepage_splitting,
-
- TP_PROTO(unsigned long addr, unsigned long pte),
- TP_ARGS(addr, pte),
- TP_STRUCT__entry(
- __field(unsigned long, addr)
- __field(unsigned long, pte)
- ),
-
- TP_fast_assign(
- __entry->addr = addr;
- __entry->pte = pte;
- ),
-
- TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx",
- __entry->addr, __entry->pte)
-);
#endif /* _TRACE_THP_H */
diff --git a/include/trace/perf.h b/include/trace/perf.h
index dbc6c74defc3..ea4405de175a 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -21,6 +21,22 @@
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field) \
+ ((void *)(&__entry->__rel_loc_##field) + \
+ sizeof(__entry->__rel_loc_##field) + \
+ (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field) \
+ ((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
#undef __perf_count
#define __perf_count(c) (__count = (c))
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 08810a463880..8c6f7c433518 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR();
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
+
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
@@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, -1)
-#undef __bitmask
-#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len) u32 item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@@ -293,6 +317,19 @@ TRACE_MAKE_SYSTEM_STR();
#undef __get_str
#define __get_str(field) ((char *)__get_dynamic_array(field))
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field) \
+ ((void *)(&__entry->__rel_loc_##field) + \
+ sizeof(__entry->__rel_loc_##field) + \
+ (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field) \
+ ((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
#undef __get_bitmask
#define __get_bitmask(field) \
({ \
@@ -302,6 +339,15 @@ TRACE_MAKE_SYSTEM_STR();
trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \
})
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) \
+ ({ \
+ void *__bitmask = __get_rel_dynamic_array(field); \
+ unsigned int __bitmask_size; \
+ __bitmask_size = __get_rel_dynamic_array_len(field); \
+ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \
+ })
+
#undef __print_flags
#define __print_flags(flag, delim, flag_array...) \
({ \
@@ -471,6 +517,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(_type, _item, _len) { \
+ .type = "__rel_loc " #_type "[]", .name = #_item, \
+ .size = 4, .align = 4, \
+ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
static struct trace_event_fields trace_event_fields_##call[] = { \
@@ -519,6 +580,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len) \
+ __item_length = (len) * sizeof(type); \
+ __data_offsets->item = __data_size + \
+ offsetof(typeof(*entry), __data) - \
+ offsetof(typeof(*entry), __rel_loc_##item) - \
+ sizeof(u32); \
+ __data_offsets->item |= __item_length << 16; \
+ __data_size += __item_length;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, \
+ strlen((src) ? (const char *)(src) : "(null)") + 1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1)
/*
* __bitmask_size_in_bytes_raw is the number of bytes needed to hold
* num_possible_cpus().
@@ -542,6 +619,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \
__bitmask_size_in_longs(nr_bits))
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \
+ __bitmask_size_in_longs(nr_bits))
+
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static inline notrace int trace_event_get_offsets_##call( \
@@ -706,6 +787,37 @@ static inline notrace int trace_event_get_offsets_##call( \
#define __assign_bitmask(dst, src, nr_bits) \
memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len) \
+ __entry->__rel_loc_##item = __data_offsets.item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __assign_rel_str
+#define __assign_rel_str(dst, src) \
+ strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)");
+
+#undef __assign_rel_str_len
+#define __assign_rel_str_len(dst, src, len) \
+ do { \
+ memcpy(__get_rel_str(dst), (src), (len)); \
+ __get_rel_str(dst)[len] = '\0'; \
+ } while (0)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
+#undef __assign_rel_bitmask
+#define __assign_rel_bitmask(dst, src, nr_bits) \
+ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
#undef TP_fast_assign
#define TP_fast_assign(args...) args
@@ -770,6 +882,10 @@ static inline void ftrace_test_probe_##call(void) \
#undef __get_dynamic_array_len
#undef __get_str
#undef __get_bitmask
+#undef __get_rel_dynamic_array
+#undef __get_rel_dynamic_array_len
+#undef __get_rel_str
+#undef __get_rel_bitmask
#undef __print_array
#undef __print_hex_dump
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 4557a8b6086f..1c48b0ae3ba3 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+
#undef __NR_syscalls
-#define __NR_syscalls 450
+#define __NR_syscalls 451
/*
* 32 bit systems traditionally used different
diff --git a/drivers/comedi/comedi.h b/include/uapi/linux/comedi.h
index b5d00a006dbb..7314e5ee0a1e 100644
--- a/drivers/comedi/comedi.h
+++ b/include/uapi/linux/comedi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.0+ */
+/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */
/*
* comedi.h
* header file for COMEDI user API
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index c750eac09fc9..a8f0ff75c430 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -28,6 +28,7 @@ enum idxd_scmd_stat {
IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
+ IDXD_SCMD_WQ_IRQ_ERR = 0x80100000,
};
#define IDXD_SCMD_SOFTERR_MASK 0x80000000
diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
index e1fb78b4bf09..3e330f368917 100644
--- a/include/uapi/linux/kfd_sysfs.h
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1daa45268de2..9563d294f181 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1131,6 +1131,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_VM_GPA_BITS 207
+#define KVM_CAP_XSAVE2 208
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1162,11 +1164,20 @@ struct kvm_irq_routing_hv_sint {
__u32 sint;
};
+struct kvm_irq_routing_xen_evtchn {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+};
+
+#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
+
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
struct kvm_irq_routing_entry {
__u32 gsi;
@@ -1178,6 +1189,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
+ struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1208,6 +1220,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1610,6 +1623,9 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..0425cd79af9a 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -6,6 +6,7 @@
#define AFFS_SUPER_MAGIC 0xadff
#define AFS_SUPER_MAGIC 0x5346414F
#define AUTOFS_SUPER_MAGIC 0x0187
+#define CEPH_SUPER_MAGIC 0x00c36400
#define CODA_SUPER_MAGIC 0x73757245
#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */
@@ -43,6 +44,7 @@
#define MINIX3_SUPER_MAGIC 0x4d5a /* minix v3 fs, 60 char names */
#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
+#define EXFAT_SUPER_MAGIC 0x2011BAB0
#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */
#define NFS_SUPER_MAGIC 0x6969
#define OCFS2_SUPER_MAGIC 0x7461636f
@@ -51,6 +53,7 @@
#define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */
#define AFS_FS_MAGIC 0x6B414653
+
#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */
/* used by file system utilities that
look at the superblock, etc. */
@@ -59,6 +62,9 @@
#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
#define SMB_SUPER_MAGIC 0x517B
+#define CIFS_SUPER_MAGIC 0xFF534D42 /* the first four bytes of SMB PDUs */
+#define SMB2_SUPER_MAGIC 0xFE534D42
+
#define CGROUP_SUPER_MAGIC 0x27e0eb
#define CGROUP2_SUPER_MAGIC 0x63677270
diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
index 50d98ec5e866..03a33ffffcba 100644
--- a/include/uapi/linux/module.h
+++ b/include/uapi/linux/module.h
@@ -5,5 +5,6 @@
/* Flags for sys_finit_module: */
#define MODULE_INIT_IGNORE_MODVERSIONS 1
#define MODULE_INIT_IGNORE_VERMAGIC 2
+#define MODULE_INIT_COMPRESSED_FILE 4
#endif /* _UAPI_LINUX_MODULE_H */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 318f3f1f9e92..bee1a9ed6e66 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -301,23 +301,23 @@
#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */
-/* Message Signalled Interrupt registers */
+/* Message Signaled Interrupt registers */
-#define PCI_MSI_FLAGS 2 /* Message Control */
+#define PCI_MSI_FLAGS 0x02 /* Message Control */
#define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */
#define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */
#define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */
#define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */
#define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */
#define PCI_MSI_RFU 3 /* Rest of capability flags */
-#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */
-#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
-#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */
-#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */
-#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */
-#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */
-#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */
-#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */
+#define PCI_MSI_ADDRESS_LO 0x04 /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI 0x08 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32 0x08 /* 16 bits of data for 32-bit devices */
+#define PCI_MSI_MASK_32 0x0c /* Mask bits register for 32-bit devices */
+#define PCI_MSI_PENDING_32 0x10 /* Pending intrs for 32-bit devices */
+#define PCI_MSI_DATA_64 0x0c /* 16 bits of data for 64-bit devices */
+#define PCI_MSI_MASK_64 0x10 /* Mask bits register for 64-bit devices */
+#define PCI_MSI_PENDING_64 0x14 /* Pending intrs for 64-bit devices */
/* MSI-X registers (in MSI-X capability) */
#define PCI_MSIX_FLAGS 2 /* Message Control */
@@ -335,10 +335,10 @@
/* MSI-X Table entry format (in memory mapped by a BAR) */
#define PCI_MSIX_ENTRY_SIZE 16
-#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */
-#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */
-#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */
-#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */
+#define PCI_MSIX_ENTRY_LOWER_ADDR 0x0 /* Message Address */
+#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */
+#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */
#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001
/* CompactPCI Hotswap Register */
@@ -470,7 +470,7 @@
/* PCI Express capability registers */
-#define PCI_EXP_FLAGS 2 /* Capabilities register */
+#define PCI_EXP_FLAGS 0x02 /* Capabilities register */
#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */
#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */
#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */
@@ -484,7 +484,7 @@
#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
-#define PCI_EXP_DEVCAP 4 /* Device capabilities */
+#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */
#define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */
#define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */
#define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */
@@ -497,7 +497,7 @@
#define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */
#define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */
#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */
-#define PCI_EXP_DEVCTL 8 /* Device Control */
+#define PCI_EXP_DEVCTL 0x08 /* Device Control */
#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */
#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */
@@ -522,7 +522,7 @@
#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */
-#define PCI_EXP_DEVSTA 10 /* Device Status */
+#define PCI_EXP_DEVSTA 0x0a /* Device Status */
#define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */
#define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */
#define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */
@@ -530,7 +530,7 @@
#define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */
#define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */
#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */
-#define PCI_EXP_LNKCAP 12 /* Link Capabilities */
+#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */
#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */
#define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
@@ -549,7 +549,7 @@
#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */
#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */
#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */
-#define PCI_EXP_LNKCTL 16 /* Link Control */
+#define PCI_EXP_LNKCTL 0x10 /* Link Control */
#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */
#define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */
#define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */
@@ -562,7 +562,7 @@
#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */
#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */
#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */
-#define PCI_EXP_LNKSTA 18 /* Link Status */
+#define PCI_EXP_LNKSTA 0x12 /* Link Status */
#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */
#define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */
#define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */
@@ -582,7 +582,7 @@
#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */
#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */
#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */
-#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */
+#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */
#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */
#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */
#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */
@@ -595,7 +595,7 @@
#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */
#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */
#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */
-#define PCI_EXP_SLTCTL 24 /* Slot Control */
+#define PCI_EXP_SLTCTL 0x18 /* Slot Control */
#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */
#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */
#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */
@@ -617,7 +617,7 @@
#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */
#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */
#define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */
-#define PCI_EXP_SLTSTA 26 /* Slot Status */
+#define PCI_EXP_SLTSTA 0x1a /* Slot Status */
#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */
#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */
#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */
@@ -627,15 +627,15 @@
#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */
#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */
#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */
-#define PCI_EXP_RTCTL 28 /* Root Control */
+#define PCI_EXP_RTCTL 0x1c /* Root Control */
#define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */
#define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */
#define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */
#define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */
#define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */
-#define PCI_EXP_RTCAP 30 /* Root Capabilities */
+#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */
#define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */
-#define PCI_EXP_RTSTA 32 /* Root Status */
+#define PCI_EXP_RTSTA 0x20 /* Root Status */
#define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */
#define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */
/*
@@ -646,7 +646,7 @@
* Use pcie_capability_read_word() and similar interfaces to use them
* safely.
*/
-#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */
+#define PCI_EXP_DEVCAP2 0x24 /* Device Capabilities 2 */
#define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */
#define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */
#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */
@@ -658,7 +658,7 @@
#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */
#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */
-#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
+#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */
#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */
#define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */
#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */
@@ -670,9 +670,9 @@
#define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */
#define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */
#define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */
-#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */
-#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */
-#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */
+#define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */
+#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */
#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */
#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */
#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */
@@ -680,7 +680,7 @@
#define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */
#define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */
#define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */
-#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */
+#define PCI_EXP_LNKCTL2 0x30 /* Link Control 2 */
#define PCI_EXP_LNKCTL2_TLS 0x000f
#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */
#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */
@@ -691,12 +691,12 @@
#define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */
#define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */
#define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */
-#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */
-#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */
+#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */
+#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */
#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */
-#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */
-#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */
+#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */
+#define PCI_EXP_SLTSTA2 0x3a /* Slot Status 2 */
/* Extended Capabilities (PCI-X 2.0 and Express) */
#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff)
@@ -742,7 +742,7 @@
#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
/* Advanced Error Reporting */
-#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
+#define PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */
#define PCI_ERR_UNC_UND 0x00000001 /* Undefined */
#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */
#define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */
@@ -760,11 +760,11 @@
#define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */
#define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */
#define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */
-#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */
+#define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */
/* Same bits as above */
-#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */
+#define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */
/* Same bits as above */
-#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */
+#define PCI_ERR_COR_STATUS 0x10 /* Correctable Error Status */
#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */
#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */
#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */
@@ -773,20 +773,20 @@
#define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */
#define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */
#define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */
-#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */
+#define PCI_ERR_COR_MASK 0x14 /* Correctable Error Mask */
/* Same bits as above */
-#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */
-#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */
+#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/
+#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */
#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */
#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */
#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
-#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */
-#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */
+#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */
#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */
#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */
#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_STATUS 48
+#define PCI_ERR_ROOT_STATUS 0x30
#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */
#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */
#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */
@@ -795,52 +795,52 @@
#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */
#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */
#define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */
-#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */
+#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */
/* Virtual Channel */
-#define PCI_VC_PORT_CAP1 4
+#define PCI_VC_PORT_CAP1 0x04
#define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */
#define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */
#define PCI_VC_CAP1_ARB_SIZE 0x00000c00
-#define PCI_VC_PORT_CAP2 8
+#define PCI_VC_PORT_CAP2 0x08
#define PCI_VC_CAP2_32_PHASE 0x00000002
#define PCI_VC_CAP2_64_PHASE 0x00000004
#define PCI_VC_CAP2_128_PHASE 0x00000008
#define PCI_VC_CAP2_ARB_OFF 0xff000000
-#define PCI_VC_PORT_CTRL 12
+#define PCI_VC_PORT_CTRL 0x0c
#define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001
-#define PCI_VC_PORT_STATUS 14
+#define PCI_VC_PORT_STATUS 0x0e
#define PCI_VC_PORT_STATUS_TABLE 0x00000001
-#define PCI_VC_RES_CAP 16
+#define PCI_VC_RES_CAP 0x10
#define PCI_VC_RES_CAP_32_PHASE 0x00000002
#define PCI_VC_RES_CAP_64_PHASE 0x00000004
#define PCI_VC_RES_CAP_128_PHASE 0x00000008
#define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010
#define PCI_VC_RES_CAP_256_PHASE 0x00000020
#define PCI_VC_RES_CAP_ARB_OFF 0xff000000
-#define PCI_VC_RES_CTRL 20
+#define PCI_VC_RES_CTRL 0x14
#define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000
#define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000
#define PCI_VC_RES_CTRL_ID 0x07000000
#define PCI_VC_RES_CTRL_ENABLE 0x80000000
-#define PCI_VC_RES_STATUS 26
+#define PCI_VC_RES_STATUS 0x1a
#define PCI_VC_RES_STATUS_TABLE 0x00000001
#define PCI_VC_RES_STATUS_NEGO 0x00000002
#define PCI_CAP_VC_BASE_SIZEOF 0x10
-#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C
+#define PCI_CAP_VC_PER_VC_SIZEOF 0x0c
/* Power Budgeting */
-#define PCI_PWR_DSR 4 /* Data Select Register */
-#define PCI_PWR_DATA 8 /* Data Register */
+#define PCI_PWR_DSR 0x04 /* Data Select Register */
+#define PCI_PWR_DATA 0x08 /* Data Register */
#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */
#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */
#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */
#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */
#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */
-#define PCI_PWR_CAP 12 /* Capability */
+#define PCI_PWR_CAP 0x0c /* Capability */
#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
-#define PCI_EXT_CAP_PWR_SIZEOF 16
+#define PCI_EXT_CAP_PWR_SIZEOF 0x10
/* Root Complex Event Collector Endpoint Association */
#define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */
@@ -964,7 +964,7 @@
#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
-#define PCI_EXT_CAP_SRIOV_SIZEOF 64
+#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40
#define PCI_LTR_MAX_SNOOP_LAT 0x4
#define PCI_LTR_MAX_NOSNOOP_LAT 0x6
@@ -1017,12 +1017,12 @@
#define PCI_TPH_LOC_NONE 0x000 /* no location */
#define PCI_TPH_LOC_CAP 0x200 /* in capability */
#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */
-#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */
-#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */
-#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */
+#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */
+#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */
+#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */
/* Downstream Port Containment */
-#define PCI_EXP_DPC_CAP 4 /* DPC Capability */
+#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */
#define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */
#define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */
#define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */
@@ -1030,19 +1030,19 @@
#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */
#define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */
-#define PCI_EXP_DPC_CTL 6 /* DPC control */
+#define PCI_EXP_DPC_CTL 0x06 /* DPC control */
#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */
#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */
#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */
-#define PCI_EXP_DPC_STATUS 8 /* DPC Status */
+#define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */
#define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */
#define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */
#define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */
#define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */
#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */
-#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */
+#define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */
#define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */
#define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */
diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h
new file mode 100644
index 000000000000..42fa15f8310d
--- /dev/null
+++ b/include/uapi/linux/pfrut.h
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Platform Firmware Runtime Update header
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+#ifndef __PFRUT_H__
+#define __PFRUT_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define PFRUT_IOCTL_MAGIC 0xEE
+
+/**
+ * PFRU_IOC_SET_REV - _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int)
+ *
+ * Return:
+ * * 0 - success
+ * * -EFAULT - fail to read the revision id
+ * * -EINVAL - user provides an invalid revision id
+ *
+ * Set the Revision ID for Platform Firmware Runtime Update.
+ */
+#define PFRU_IOC_SET_REV _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int)
+
+/**
+ * PFRU_IOC_STAGE - _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - stage phase returns invalid result
+ *
+ * Stage a capsule image from communication buffer and perform authentication.
+ */
+#define PFRU_IOC_STAGE _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int)
+
+/**
+ * PFRU_IOC_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - activate phase returns invalid result
+ *
+ * Activate a previously staged capsule image.
+ */
+#define PFRU_IOC_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int)
+
+/**
+ * PFRU_IOC_STAGE_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - stage/activate phase returns invalid result.
+ *
+ * Perform both stage and activation action.
+ */
+#define PFRU_IOC_STAGE_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int)
+
+/**
+ * PFRU_IOC_QUERY_CAP - _IOR(PFRUT_IOCTL_MAGIC, 0x05,
+ * struct pfru_update_cap_info)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - query phase returns invalid result
+ * * -EFAULT - the result fails to be copied to userspace
+ *
+ * Retrieve information on the Platform Firmware Runtime Update capability.
+ * The information is a struct pfru_update_cap_info.
+ */
+#define PFRU_IOC_QUERY_CAP _IOR(PFRUT_IOCTL_MAGIC, 0x05, struct pfru_update_cap_info)
+
+/**
+ * struct pfru_payload_hdr - Capsule file payload header.
+ *
+ * @sig: Signature of this capsule file.
+ * @hdr_version: Revision of this header structure.
+ * @hdr_size: Size of this header, including the OemHeader bytes.
+ * @hw_ver: The supported firmware version.
+ * @rt_ver: Version of the code injection image.
+ * @platform_id: A platform specific GUID to specify the platform what
+ * this capsule image support.
+ */
+struct pfru_payload_hdr {
+ __u32 sig;
+ __u32 hdr_version;
+ __u32 hdr_size;
+ __u32 hw_ver;
+ __u32 rt_ver;
+ __u8 platform_id[16];
+};
+
+enum pfru_dsm_status {
+ DSM_SUCCEED = 0,
+ DSM_FUNC_NOT_SUPPORT = 1,
+ DSM_INVAL_INPUT = 2,
+ DSM_HARDWARE_ERR = 3,
+ DSM_RETRY_SUGGESTED = 4,
+ DSM_UNKNOWN = 5,
+ DSM_FUNC_SPEC_ERR = 6,
+};
+
+/**
+ * struct pfru_update_cap_info - Runtime update capability information.
+ *
+ * @status: Indicator of whether this query succeed.
+ * @update_cap: Bitmap to indicate whether the feature is supported.
+ * @code_type: A buffer containing an image type GUID.
+ * @fw_version: Platform firmware version.
+ * @code_rt_version: Code injection runtime version for anti-rollback.
+ * @drv_type: A buffer containing an image type GUID.
+ * @drv_rt_version: The version of the driver update runtime code.
+ * @drv_svn: The secure version number(SVN) of the driver update runtime code.
+ * @platform_id: A buffer containing a platform ID GUID.
+ * @oem_id: A buffer containing an OEM ID GUID.
+ * @oem_info_len: Length of the buffer containing the vendor specific information.
+ */
+struct pfru_update_cap_info {
+ __u32 status;
+ __u32 update_cap;
+
+ __u8 code_type[16];
+ __u32 fw_version;
+ __u32 code_rt_version;
+
+ __u8 drv_type[16];
+ __u32 drv_rt_version;
+ __u32 drv_svn;
+
+ __u8 platform_id[16];
+ __u8 oem_id[16];
+
+ __u32 oem_info_len;
+};
+
+/**
+ * struct pfru_com_buf_info - Communication buffer information.
+ *
+ * @status: Indicator of whether this query succeed.
+ * @ext_status: Implementation specific query result.
+ * @addr_lo: Low 32bit physical address of the communication buffer to hold
+ * a runtime update package.
+ * @addr_hi: High 32bit physical address of the communication buffer to hold
+ * a runtime update package.
+ * @buf_size: Maximum size in bytes of the communication buffer.
+ */
+struct pfru_com_buf_info {
+ __u32 status;
+ __u32 ext_status;
+ __u64 addr_lo;
+ __u64 addr_hi;
+ __u32 buf_size;
+};
+
+/**
+ * struct pfru_updated_result - Platform firmware runtime update result information.
+ * @status: Indicator of whether this update succeed.
+ * @ext_status: Implementation specific update result.
+ * @low_auth_time: Low 32bit value of image authentication time in nanosecond.
+ * @high_auth_time: High 32bit value of image authentication time in nanosecond.
+ * @low_exec_time: Low 32bit value of image execution time in nanosecond.
+ * @high_exec_time: High 32bit value of image execution time in nanosecond.
+ */
+struct pfru_updated_result {
+ __u32 status;
+ __u32 ext_status;
+ __u64 low_auth_time;
+ __u64 high_auth_time;
+ __u64 low_exec_time;
+ __u64 high_exec_time;
+};
+
+/**
+ * struct pfrt_log_data_info - Log Data from telemetry service.
+ * @status: Indicator of whether this update succeed.
+ * @ext_status: Implementation specific update result.
+ * @chunk1_addr_lo: Low 32bit physical address of the telemetry data chunk1
+ * starting address.
+ * @chunk1_addr_hi: High 32bit physical address of the telemetry data chunk1
+ * starting address.
+ * @chunk2_addr_lo: Low 32bit physical address of the telemetry data chunk2
+ * starting address.
+ * @chunk2_addr_hi: High 32bit physical address of the telemetry data chunk2
+ * starting address.
+ * @max_data_size: Maximum supported size of data of all data chunks combined.
+ * @chunk1_size: Data size in bytes of the telemetry data chunk1 buffer.
+ * @chunk2_size: Data size in bytes of the telemetry data chunk2 buffer.
+ * @rollover_cnt: Number of times telemetry data buffer is overwritten
+ * since telemetry buffer reset.
+ * @reset_cnt: Number of times telemetry services resets that results in
+ * rollover count and data chunk buffers are reset.
+ */
+struct pfrt_log_data_info {
+ __u32 status;
+ __u32 ext_status;
+ __u64 chunk1_addr_lo;
+ __u64 chunk1_addr_hi;
+ __u64 chunk2_addr_lo;
+ __u64 chunk2_addr_hi;
+ __u32 max_data_size;
+ __u32 chunk1_size;
+ __u32 chunk2_size;
+ __u32 rollover_cnt;
+ __u32 reset_cnt;
+};
+
+/**
+ * struct pfrt_log_info - Telemetry log information.
+ * @log_level: The telemetry log level.
+ * @log_type: The telemetry log type(history and execution).
+ * @log_revid: The telemetry log revision id.
+ */
+struct pfrt_log_info {
+ __u32 log_level;
+ __u32 log_type;
+ __u32 log_revid;
+};
+
+/**
+ * PFRT_LOG_IOC_SET_INFO - _IOW(PFRUT_IOCTL_MAGIC, 0x06,
+ * struct pfrt_log_info)
+ *
+ * Return:
+ * * 0 - success
+ * * -EFAULT - fail to get the setting parameter
+ * * -EINVAL - fail to set the log level
+ *
+ * Set the PFRT log level and log type. The input information is
+ * a struct pfrt_log_info.
+ */
+#define PFRT_LOG_IOC_SET_INFO _IOW(PFRUT_IOCTL_MAGIC, 0x06, struct pfrt_log_info)
+
+/**
+ * PFRT_LOG_IOC_GET_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x07,
+ * struct pfrt_log_info)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - fail to get the log level
+ * * -EFAULT - fail to copy the result back to userspace
+ *
+ * Retrieve log level and log type of the telemetry. The information is
+ * a struct pfrt_log_info.
+ */
+#define PFRT_LOG_IOC_GET_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x07, struct pfrt_log_info)
+
+/**
+ * PFRT_LOG_IOC_GET_DATA_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x08,
+ * struct pfrt_log_data_info)
+ *
+ * Return:
+ * * 0 - success
+ * * -EINVAL - fail to get the log buffer information
+ * * -EFAULT - fail to copy the log buffer information to userspace
+ *
+ * Retrieve data information about the telemetry. The information
+ * is a struct pfrt_log_data_info.
+ */
+#define PFRT_LOG_IOC_GET_DATA_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x08, struct pfrt_log_data_info)
+
+#endif /* __PFRUT_H__ */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index bb73e9a0b24f..e998764f0262 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -272,4 +272,7 @@ struct prctl_mm_map {
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#define PR_SET_VMA 0x53564d41
+# define PR_SET_VMA_ANON_NAME 0
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/soundcard.h b/include/uapi/linux/soundcard.h
index f3b21f989872..ac1318793a86 100644
--- a/include/uapi/linux/soundcard.h
+++ b/include/uapi/linux/soundcard.h
@@ -1051,7 +1051,7 @@ typedef struct mixer_vol_table {
* the GPL version of OSS-4.x and build against that version
* of the header.
*
- * We redefine the extern keyword so that make headers_check
+ * We redefine the extern keyword so that usr/include/headers_check.pl
* does not complain about SEQ_USE_EXTBUF.
*/
#define SEQ_DECLAREBUF() SEQ_USE_EXTBUF()
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index ccbd08709321..12327d32378f 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
*/
-#define TASKSTATS_VERSION 10
+#define TASKSTATS_VERSION 11
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -172,6 +172,10 @@ struct taskstats {
/* v10: 64-bit btime to avoid overflow */
__u64 ac_btime64; /* 64-bit begin time */
+
+ /* Delay waiting for memory compact */
+ __u64 compact_count;
+ __u64 compact_delay_total;
};
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index e5a7eecef7c3..c0f4bd9b040e 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -1,18 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */
/*
* UUID/GUID definition
*
* Copyright (C) 2010, Intel Corp.
* Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#ifndef _UAPI_LINUX_UUID_H_
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index a252f06f9dfd..1061d8d2d09d 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -23,6 +23,9 @@ enum vdpa_command {
enum vdpa_attr {
VDPA_ATTR_UNSPEC,
+ /* Pad attribute for 64b alignment */
+ VDPA_ATTR_PAD = VDPA_ATTR_UNSPEC,
+
/* bus name (optional) + dev name together make the parent device handle */
VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */
VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */
@@ -40,6 +43,9 @@ enum vdpa_attr {
VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */
VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */
+ VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */
+ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */
+ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */
/* new attributes must be added above here */
VDPA_ATTR_MAX,
};
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 00b309590499..371dfc4243b3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -333,7 +333,18 @@ enum hl_server_type {
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
+ * HL_INFO_POWER - Retrieve power information
* HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls
+ * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info
+ * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num
+ * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened
+ * and CS timeout or razwi error occurred.
+ * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number.
+ * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi:
+ * Timestamp of razwi.
+ * The address which accessing it caused the razwi.
+ * Razwi initiator.
+ * Razwi cause, was it a page fault or MMU access error.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -353,8 +364,13 @@ enum hl_server_type {
#define HL_INFO_PLL_FREQUENCY 16
#define HL_INFO_POWER 17
#define HL_INFO_OPEN_STATS 18
+#define HL_INFO_DRAM_REPLACED_ROWS 21
+#define HL_INFO_DRAM_PENDING_ROWS 22
+#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23
+#define HL_INFO_CS_TIMEOUT_EVENT 24
+#define HL_INFO_RAZWI_EVENT 25
-#define HL_INFO_VERSION_MAX_LEN 128
+#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
/**
@@ -473,15 +489,27 @@ struct hl_info_pci_counters {
__u64 replay_cnt;
};
-#define HL_CLK_THROTTLE_POWER 0x1
-#define HL_CLK_THROTTLE_THERMAL 0x2
+enum hl_clk_throttling_type {
+ HL_CLK_THROTTLE_TYPE_POWER,
+ HL_CLK_THROTTLE_TYPE_THERMAL,
+ HL_CLK_THROTTLE_TYPE_MAX
+};
+
+/* clk_throttling_reason masks */
+#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER)
+#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL)
/**
* struct hl_info_clk_throttle - clock throttling reason
* @clk_throttling_reason: each bit represents a clk throttling reason
+ * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event
+ * @clk_throttling_duration_ns: the clock throttle time in nanosec
*/
struct hl_info_clk_throttle {
__u32 clk_throttling_reason;
+ __u32 pad;
+ __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX];
+ __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX];
};
/**
@@ -559,6 +587,51 @@ struct hl_info_cs_counters {
__u64 ctx_validation_drop_cnt;
};
+/**
+ * struct hl_info_last_err_open_dev_time - last error boot information.
+ * @timestamp: timestamp of last time the device was opened and error occurred.
+ */
+struct hl_info_last_err_open_dev_time {
+ __s64 timestamp;
+};
+
+/**
+ * struct hl_info_cs_timeout_event - last CS timeout information.
+ * @timestamp: timestamp when last CS timeout event occurred.
+ * @seq: sequence number of last CS timeout event.
+ */
+struct hl_info_cs_timeout_event {
+ __s64 timestamp;
+ __u64 seq;
+};
+
+#define HL_RAZWI_PAGE_FAULT 0
+#define HL_RAZWI_MMU_ACCESS_ERROR 1
+
+/**
+ * struct hl_info_razwi_event - razwi information.
+ * @timestamp: timestamp of razwi.
+ * @addr: address which accessing it caused razwi.
+ * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not
+ * have engine id it will be set to U16_MAX.
+ * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
+ * engines which one them caused the razwi. In that case, it will contain the
+ * second possible engine id, otherwise it will be set to U16_MAX.
+ * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1,
+ * otherwise 0.
+ * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
+ * @pad: padding to 64 bit.
+ */
+struct hl_info_razwi_event {
+ __s64 timestamp;
+ __u64 addr;
+ __u16 engine_id_1;
+ __u16 engine_id_2;
+ __u8 no_engine_id;
+ __u8 error_type;
+ __u8 pad[2];
+};
+
enum gaudi_dcores {
HL_GAUDI_WS_DCORE,
HL_GAUDI_WN_DCORE,
@@ -607,7 +680,10 @@ struct hl_info_args {
#define HL_MAX_CB_SIZE (0x200000 - 32)
/* Indicates whether the command buffer should be mapped to the device's MMU */
-#define HL_CB_FLAGS_MAP 0x1
+#define HL_CB_FLAGS_MAP 0x1
+
+/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */
+#define HL_CB_FLAGS_GET_DEVICE_VA 0x2
struct hl_cb_in {
/* Handle of CB or 0 if we want to create one */
@@ -629,11 +705,16 @@ struct hl_cb_out {
/* Handle of CB */
__u64 cb_handle;
- /* Information about CB */
- struct {
- /* Usage count of CB */
- __u32 usage_cnt;
- __u32 pad;
+ union {
+ /* Information about CB */
+ struct {
+ /* Usage count of CB */
+ __u32 usage_cnt;
+ __u32 pad;
+ };
+
+ /* CB mapped address to device MMU */
+ __u64 device_va;
};
};
};
@@ -856,9 +937,17 @@ struct hl_cs_out {
/*
* SOB base address offset
- * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
+ * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set
*/
__u32 sob_base_addr_offset;
+
+ /*
+ * Count of completed signals in SOB before current signal submission.
+ * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION)
+ * or HL_CS_FLAGS_SIGNAL is set
+ */
+ __u16 sob_count_before_submission;
+ __u16 pad[3];
};
union hl_cs_args {
@@ -866,9 +955,10 @@ union hl_cs_args {
struct hl_cs_out out;
};
-#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
-#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
-#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
+#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
@@ -888,14 +978,23 @@ struct hl_wait_cs_in {
};
struct {
- /* User address for completion comparison.
- * upon interrupt, driver will compare the value pointed
- * by this address with the supplied target value.
- * in order not to perform any comparison, set address
- * to all 1s.
- * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
- */
- __u64 addr;
+ union {
+ /* User address for completion comparison.
+ * upon interrupt, driver will compare the value pointed
+ * by this address with the supplied target value.
+ * in order not to perform any comparison, set address
+ * to all 1s.
+ * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+ */
+ __u64 addr;
+
+ /* cq_counters_handle to a kernel mapped cb which contains
+ * cq counters.
+ * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
+ */
+ __u64 cq_counters_handle;
+ };
+
/* Target value for completion comparison */
__u64 target;
};
@@ -911,14 +1010,27 @@ struct hl_wait_cs_in {
*/
__u32 flags;
- /* Multi CS API info- valid entries in multi-CS array */
- __u8 seq_arr_len;
- __u8 pad[3];
+ union {
+ struct {
+ /* Multi CS API info- valid entries in multi-CS array */
+ __u8 seq_arr_len;
+ __u8 pad[7];
+ };
+
+ /* Absolute timeout to wait for an interrupt in microseconds.
+ * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+ */
+ __u64 interrupt_timeout_us;
+ };
- /* Absolute timeout to wait for an interrupt in microseconds.
- * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+ /*
+ * cq counter offset inside the counters cb pointed by cq_counters_handle above.
+ * upon interrupt, driver will compare the value pointed
+ * by this address (cq_counters_handle + cq_counters_offset)
+ * with the supplied target value.
+ * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
*/
- __u32 interrupt_timeout_us;
+ __u64 cq_counters_offset;
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
diff --git a/init/Kconfig b/init/Kconfig
index c5a8f80f0a1b..e9119bf54b1f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1414,7 +1414,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION
config LD_ORPHAN_WARN
def_bool y
depends on ARCH_WANT_LD_ORPHAN_WARN
- depends on !LD_IS_LLD || LLD_VERSION >= 110000
depends on $(ld-option,--orphan-handling=warn)
config SYSCTL
@@ -2278,6 +2277,19 @@ config MODULE_COMPRESS_ZSTD
endchoice
+config MODULE_DECOMPRESS
+ bool "Support in-kernel module decompression"
+ depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
+ select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
+ select XZ_DEC if MODULE_COMPRESS_XZ
+ help
+
+ Support for decompressing kernel modules by the kernel itself
+ instead of relying on userspace to perform this task. Useful when
+ load pinning security policy is enabled.
+
+ If unsure, say N.
+
config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
bool "Allow loading of modules with missing namespace imports"
help
diff --git a/init/Makefile b/init/Makefile
index 04eeee12c076..06326e304384 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -31,7 +31,7 @@ quiet_cmd_compile.h = CHK $@
cmd_compile.h = \
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
- "$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
+ "$(CONFIG_PREEMPT_RT)" "$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
include/generated/compile.h: FORCE
$(call cmd,compile.h)
diff --git a/init/main.c b/init/main.c
index bb984ed79de0..65fa2e41a9c0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -834,12 +834,15 @@ static void __init mm_init(void)
init_mem_debugging_and_hardening();
kfence_alloc_pool();
report_meminit();
- stack_depot_init();
+ stack_depot_early_init();
mem_init();
mem_init_print_info();
- /* page_owner must be initialized after buddy is ready */
- page_ext_init_flatmem_late();
kmem_cache_init();
+ /*
+ * page_owner must be initialized after buddy is ready, and also after
+ * slab is ready so that stack_depot_init() works properly
+ */
+ page_ext_init_flatmem_late();
kmemleak_init();
pgtable_init();
debug_objects_mem_init();
diff --git a/ipc/util.c b/ipc/util.c
index fa2d86ef3fb8..a2208d0f26b2 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -894,7 +894,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file)
if (!iter)
return -ENOMEM;
- iter->iface = PDE_DATA(inode);
+ iter->iface = pde_data(inode);
iter->ns = get_ipc_ns(current->nsproxy->ipc_ns);
iter->pid_ns = get_pid_ns(task_active_pid_ns(current));
diff --git a/kernel/Makefile b/kernel/Makefile
index 186c49582f45..56f4ee97f328 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -67,6 +67,7 @@ obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_MODULE_DECOMPRESS) += module_decompress.o
obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 33bb8ae4a804..e16dafeb2450 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
- if (check_ctx_reg(env, reg, regno))
+ if (check_ptr_off_reg(env, reg, regno))
return -EINVAL;
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
const struct btf_type *reg_ref_t;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 80da1db47c68..5a8d9f7467bf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
int opt;
opt = fs_parse(fc, bpf_fs_parameters, param, &result);
- if (opt < 0)
+ if (opt < 0) {
/* We might like to report bad mount options here, but
* traditionally we've ignored all mount options, so we'd
* better continue to ignore non-existing options for bpf.
*/
- return opt == -ENOPARAM ? 0 : opt;
+ if (opt == -ENOPARAM) {
+ opt = vfs_parse_fs_param_source(fc, param);
+ if (opt != -ENOPARAM)
+ return opt;
+
+ return 0;
+ }
+
+ if (opt < 0)
+ return opt;
+ }
switch (opt) {
case OPT_MODE:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bfb45381fb3f..a39eedecc93a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -570,6 +570,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
if (type & MEM_RDONLY)
strncpy(prefix, "rdonly_", 16);
+ if (type & MEM_ALLOC)
+ strncpy(prefix, "alloc_", 16);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -616,7 +618,7 @@ static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
{
- env->scratched_stack_slots |= 1UL << spi;
+ env->scratched_stack_slots |= 1ULL << spi;
}
static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
@@ -637,14 +639,14 @@ static bool verifier_state_scratched(const struct bpf_verifier_env *env)
static void mark_verifier_state_clean(struct bpf_verifier_env *env)
{
env->scratched_regs = 0U;
- env->scratched_stack_slots = 0UL;
+ env->scratched_stack_slots = 0ULL;
}
/* Used for printing the entire verifier state. */
static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
{
env->scratched_regs = ~0U;
- env->scratched_stack_slots = ~0UL;
+ env->scratched_stack_slots = ~0ULL;
}
/* The reg state of a pointer or a bounded scalar was saved when
@@ -3969,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
}
#endif
-int check_ctx_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno)
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ bool fixed_off_ok)
{
- /* Access to ctx or passing it to a helper is only allowed in
- * its original, unmodified form.
+ /* Access to this pointer-typed register or passing it to a helper
+ * is only allowed in its original, unmodified form.
*/
- if (reg->off) {
- verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
- regno, reg->off);
+ if (!fixed_off_ok && reg->off) {
+ verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
return -EACCES;
}
@@ -3986,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
return -EACCES;
}
return 0;
}
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno)
+{
+ return __check_ptr_off_reg(env, reg, regno, false);
+}
+
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
const struct bpf_reg_state *reg,
@@ -4437,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_ctx_reg(env, reg, regno);
+ err = check_ptr_off_reg(env, reg, regno);
if (err < 0)
return err;
@@ -5127,6 +5137,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
+ PTR_TO_MEM | MEM_ALLOC,
PTR_TO_BUF,
},
};
@@ -5144,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = {
static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5244,12 +5255,6 @@ found:
kernel_type_name(btf_vmlinux, *arg_btf_id));
return -EACCES;
}
-
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
- regno);
- return -EACCES;
- }
}
return 0;
@@ -5304,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (err)
return err;
- if (type == PTR_TO_CTX) {
- err = check_ctx_reg(env, reg, regno);
+ switch ((u32)type) {
+ case SCALAR_VALUE:
+ /* Pointer types where reg offset is explicitly allowed: */
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM | MEM_RDONLY:
+ case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_BUF:
+ case PTR_TO_BUF | MEM_RDONLY:
+ case PTR_TO_STACK:
+ /* Some of the argument types nevertheless require a
+ * zero register offset.
+ */
+ if (arg_type == ARG_PTR_TO_ALLOC_MEM)
+ goto force_off_check;
+ break;
+ /* All the rest must be rejected: */
+ default:
+force_off_check:
+ err = __check_ptr_off_reg(env, reg, regno,
+ type == PTR_TO_BTF_ID);
if (err < 0)
return err;
+ break;
}
skip_type_check:
@@ -9507,9 +9535,13 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
- if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
- mark_reg_known_zero(env, regs, insn->dst_reg);
+ /* All special src_reg cases are listed below. From this point onwards
+ * we either succeed and assign a corresponding dst_reg->type after
+ * zeroing the offset, or fail and reject the program.
+ */
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
dst_reg->type = aux->btf_var.reg_type;
switch (base_type(dst_reg->type)) {
case PTR_TO_MEM:
@@ -9547,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
map = env->used_maps[aux->map_index];
- mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
@@ -9651,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
}
- err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+ err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
if (err < 0)
return err;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index b31e1465868a..9d05c3ca2d5e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);
+ /* Allow only one trigger per file descriptor */
+ if (ctx->psi.trigger) {
+ cgroup_put(cgrp);
+ return -EBUSY;
+ }
+
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
new = psi_trigger_create(psi, buf, nbytes, res);
if (IS_ERR(new)) {
@@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
return PTR_ERR(new);
}
- psi_trigger_replace(&ctx->psi.trigger, new);
-
+ smp_store_release(&ctx->psi.trigger, new);
cgroup_put(cgrp);
return nbytes;
@@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
- psi_trigger_replace(&ctx->psi.trigger, NULL);
+ psi_trigger_destroy(ctx->psi.trigger);
}
bool cgroup_psi_enabled(void)
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
new file mode 100644
index 000000000000..e9ffb0cc1eec
--- /dev/null
+++ b/kernel/configs/debug.config
@@ -0,0 +1,105 @@
+# The config is based on running daily CI for enterprise Linux distros to
+# seek regressions on linux-next builds on different bare-metal and virtual
+# platforms. It can be used for example,
+#
+# $ make ARCH=arm64 defconfig debug.config
+#
+# Keep alphabetically sorted inside each section.
+#
+# printk and dmesg options
+#
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_PRINTK_TIME=y
+CONFIG_SYMBOLIC_ERRNAME=y
+#
+# Compile-time checks and compiler options
+#
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_FRAME_WARN=2048
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+#
+# Generic Kernel Debugging Instruments
+#
+# CONFIG_UBSAN_ALIGNMENT is not set
+# CONFIG_UBSAN_DIV_ZERO is not set
+# CONFIG_UBSAN_TRAP is not set
+# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_FS_ALLOW_ALL=y
+CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_BOOL=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_ENUM=y
+CONFIG_UBSAN_SHIFT=y
+CONFIG_UBSAN_UNREACHABLE=y
+#
+# Memory Debugging
+#
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_WX is not set
+# CONFIG_KFENCE is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_OWNER=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VIRTUAL=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_GENERIC_PTDUMP=y
+CONFIG_KASAN=y
+CONFIG_KASAN_GENERIC=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SLUB_DEBUG_ON=y
+#
+# Debug Oops, Lockups and Hangs
+#
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_SOFTLOCKUP_DETECTOR=y
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+# CONFIG_PROVE_RAW_LOCK_NESTING is not set
+CONFIG_PROVE_LOCKING=y
+#
+# Debug kernel data structures
+#
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+#
+# RCU Debugging
+#
+CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
+#
+# Tracers
+#
+CONFIG_BRANCH_PROFILE_NONE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 51530d5b15a8..c5e8cea9e05f 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -100,19 +100,10 @@ void __delayacct_blkio_start(void)
*/
void __delayacct_blkio_end(struct task_struct *p)
{
- struct task_delay_info *delays = p->delays;
- u64 *total;
- u32 *count;
-
- if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
- total = &delays->swapin_delay;
- count = &delays->swapin_count;
- } else {
- total = &delays->blkio_delay;
- count = &delays->blkio_count;
- }
-
- delayacct_end(&delays->lock, &delays->blkio_start, total, count);
+ delayacct_end(&p->delays->lock,
+ &p->delays->blkio_start,
+ &p->delays->blkio_delay,
+ &p->delays->blkio_count);
}
int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -164,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
+ tmp = d->compact_delay_total + tsk->delays->compact_delay;
+ d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
d->thrashing_count += tsk->delays->thrashing_count;
+ d->compact_count += tsk->delays->compact_count;
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
@@ -179,8 +173,7 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
unsigned long flags;
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
- ret = nsec_to_clock_t(tsk->delays->blkio_delay +
- tsk->delays->swapin_delay);
+ ret = nsec_to_clock_t(tsk->delays->blkio_delay);
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
@@ -210,3 +203,29 @@ void __delayacct_thrashing_end(void)
&current->delays->thrashing_delay,
&current->delays->thrashing_count);
}
+
+void __delayacct_swapin_start(void)
+{
+ current->delays->swapin_start = local_clock();
+}
+
+void __delayacct_swapin_end(void)
+{
+ delayacct_end(&current->delays->lock,
+ &current->delays->swapin_start,
+ &current->delays->swapin_delay,
+ &current->delays->swapin_count);
+}
+
+void __delayacct_compact_start(void)
+{
+ current->delays->compact_start = local_clock();
+}
+
+void __delayacct_compact_end(void)
+{
+ delayacct_end(&current->delays->lock,
+ &current->delays->compact_start,
+ &current->delays->compact_delay,
+ &current->delays->compact_count);
+}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 5f84e6cdb78e..4d40dcce7604 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void)
GFP_KERNEL);
if (!atomic_pool_kernel)
ret = -ENOMEM;
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ if (has_managed_dma()) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL | GFP_DMA);
if (!atomic_pool_dma)
@@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
if (prev == NULL) {
if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
return atomic_pool_dma32;
- if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+ if (atomic_pool_dma && (gfp & GFP_DMA))
return atomic_pool_dma;
return atomic_pool_kernel;
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8e840fbbed7c..f1e7ea160b43 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,6 +50,7 @@
#include <asm/io.h>
#include <asm/dma.h>
+#include <linux/io.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/iommu-helper.h>
@@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;
struct io_tlb_mem io_tlb_default_mem;
+phys_addr_t swiotlb_unencrypted_base;
+
/*
* Max segment that we can provide which (if pages are contingous) will
* not be bounced (unless SWIOTLB_FORCE is set).
@@ -156,6 +159,34 @@ static inline unsigned long nr_slots(u64 val)
}
/*
+ * Remap swioltb memory in the unencrypted physical address space
+ * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
+ * Isolation VMs).
+ */
+#ifdef CONFIG_HAS_IOMEM
+static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+ void *vaddr = NULL;
+
+ if (swiotlb_unencrypted_base) {
+ phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+
+ vaddr = memremap(paddr, bytes, MEMREMAP_WB);
+ if (!vaddr)
+ pr_err("Failed to map the unencrypted memory %pa size %lx.\n",
+ &paddr, bytes);
+ }
+
+ return vaddr;
+}
+#else
+static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+ return NULL;
+}
+#endif
+
+/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
* call SWIOTLB when the operations are possible. It needs to be called
@@ -172,7 +203,12 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
- memset(vaddr, 0, bytes);
+
+ mem->vaddr = swiotlb_mem_remap(mem, bytes);
+ if (!mem->vaddr)
+ mem->vaddr = vaddr;
+
+ memset(mem->vaddr, 0, bytes);
}
static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
@@ -196,7 +232,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
+
+ /*
+ * If swiotlb_unencrypted_base is set, the bounce buffer memory will
+ * be remapped and cleared in swiotlb_update_mem_attributes.
+ */
+ if (swiotlb_unencrypted_base)
+ return;
+
memset(vaddr, 0, bytes);
+ mem->vaddr = vaddr;
+ return;
}
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -371,7 +417,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
- unsigned char *vaddr = phys_to_virt(tlb_addr);
+ unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
unsigned int tlb_offset, orig_addr_offset;
if (orig_addr == INVALID_PHYS_ADDR)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc18664f49b0..479c9e672ec4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
WRITE_ONCE(event->state, state);
}
+/*
+ * UP store-release, load-acquire
+ */
+
+#define __store_release(ptr, val) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(ptr), (val)); \
+} while (0)
+
+#define __load_acquire(ptr) \
+({ \
+ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
+ barrier(); \
+ ___p; \
+})
+
#ifdef CONFIG_CGROUP_PERF
static inline bool
@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
return t->time;
}
-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
- struct perf_cgroup_info *info;
- u64 now;
-
- now = perf_clock();
+ struct perf_cgroup_info *t;
- info = this_cpu_ptr(cgrp->info);
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ if (!__load_acquire(&t->active))
+ return t->time;
+ now += READ_ONCE(t->timeoffset);
+ return now;
+}
- info->time += now - info->timestamp;
+static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
+{
+ if (adv)
+ info->time += now - info->timestamp;
info->timestamp = now;
+ /*
+ * see update_context_time()
+ */
+ WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
{
struct perf_cgroup *cgrp = cpuctx->cgrp;
struct cgroup_subsys_state *css;
+ struct perf_cgroup_info *info;
if (cgrp) {
+ u64 now = perf_clock();
+
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
- __update_cgrp_time(cgrp);
+ info = this_cpu_ptr(cgrp->info);
+
+ __update_cgrp_time(info, now, true);
+ if (final)
+ __store_release(&info->active, 0);
}
}
}
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
+ struct perf_cgroup_info *info;
struct perf_cgroup *cgrp;
/*
@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
/*
* Do not update time when cgroup is not active
*/
- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
- __update_cgrp_time(event->cgrp);
+ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
+ info = this_cpu_ptr(event->cgrp->info);
+ __update_cgrp_time(info, perf_clock(), true);
+ }
}
static inline void
@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
info = this_cpu_ptr(cgrp->info);
- info->timestamp = ctx->timestamp;
+ __update_cgrp_time(info, ctx->timestamp, false);
+ __store_release(&info->active, 1);
}
}
@@ -982,14 +1019,6 @@ out:
}
static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
- struct perf_cgroup_info *t;
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
{
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
+ bool final)
{
}
@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{
}
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
+ return 0;
}
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
return 0;
}
@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
/*
* Update the record of the current time in a context.
*/
-static void update_context_time(struct perf_event_context *ctx)
+static void __update_context_time(struct perf_event_context *ctx, bool adv)
{
u64 now = perf_clock();
- ctx->time += now - ctx->timestamp;
+ if (adv)
+ ctx->time += now - ctx->timestamp;
ctx->timestamp = now;
+
+ /*
+ * The above: time' = time + (now - timestamp), can be re-arranged
+ * into: time` = now + (time - timestamp), which gives a single value
+ * offset to compute future time without locks on.
+ *
+ * See perf_event_time_now(), which can be used from NMI context where
+ * it's (obviously) not possible to acquire ctx->lock in order to read
+ * both the above values in a consistent manner.
+ */
+ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
+}
+
+static void update_context_time(struct perf_event_context *ctx)
+{
+ __update_context_time(ctx, true);
}
static u64 perf_event_time(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
+ if (unlikely(!ctx))
+ return 0;
+
if (is_cgroup_event(event))
return perf_cgroup_event_time(event);
- return ctx ? ctx->time : 0;
+ return ctx->time;
+}
+
+static u64 perf_event_time_now(struct perf_event *event, u64 now)
+{
+ struct perf_event_context *ctx = event->ctx;
+
+ if (unlikely(!ctx))
+ return 0;
+
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time_now(event, now);
+
+ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
+ return ctx->time;
+
+ now += READ_ONCE(ctx->timeoffset);
+ return now;
}
static enum event_type_t get_event_type(struct perf_event *event)
@@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event,
if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, false);
}
event_sched_out(event, cpuctx, ctx);
@@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event,
list_del_event(event, ctx);
if (!ctx->nr_events && ctx->is_active) {
+ if (ctx == &cpuctx->ctx)
+ update_cgrp_time_from_cpuctx(cpuctx, true);
+
ctx->is_active = 0;
ctx->rotate_necessary = 0;
if (ctx->task) {
@@ -2482,40 +2552,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
irq_work_queue(&event->pending);
}
-static void perf_set_shadow_time(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- /*
- * use the correct time source for the time snapshot
- *
- * We could get by without this by leveraging the
- * fact that to get to this function, the caller
- * has most likely already called update_context_time()
- * and update_cgrp_time_xx() and thus both timestamp
- * are identical (or very close). Given that tstamp is,
- * already adjusted for cgroup, we could say that:
- * tstamp - ctx->timestamp
- * is equivalent to
- * tstamp - cgrp->timestamp.
- *
- * Then, in perf_output_read(), the calculation would
- * work with no changes because:
- * - event is guaranteed scheduled in
- * - no scheduled out in between
- * - thus the timestamp would be the same
- *
- * But this is a bit hairy.
- *
- * So instead, we have an explicit cgroup call to remain
- * within the time source all along. We believe it
- * is cleaner and simpler to understand.
- */
- if (is_cgroup_event(event))
- perf_cgroup_set_shadow_time(event, event->tstamp);
- else
- event->shadow_ctx_time = event->tstamp - ctx->timestamp;
-}
-
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_event *event, int enable);
@@ -2556,8 +2592,6 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
- perf_set_shadow_time(event, ctx);
-
perf_log_itrace_start(event);
if (event->pmu->add(event, PERF_EF_START)) {
@@ -3251,16 +3285,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
return;
}
- ctx->is_active &= ~event_type;
- if (!(ctx->is_active & EVENT_ALL))
- ctx->is_active = 0;
-
- if (ctx->task) {
- WARN_ON_ONCE(cpuctx->task_ctx != ctx);
- if (!ctx->is_active)
- cpuctx->task_ctx = NULL;
- }
-
/*
* Always update time if it was set; not only when it changes.
* Otherwise we can 'forget' to update time for any but the last
@@ -3274,7 +3298,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (is_active & EVENT_TIME) {
/* update (and stop) ctx time */
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
+ ctx->is_active &= ~event_type;
+ if (!(ctx->is_active & EVENT_ALL))
+ ctx->is_active = 0;
+
+ if (ctx->task) {
+ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+ if (!ctx->is_active)
+ cpuctx->task_ctx = NULL;
}
is_active ^= ctx->is_active; /* changed bits */
@@ -3711,13 +3750,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
return 0;
}
+/*
+ * Because the userpage is strictly per-event (there is no concept of context,
+ * so there cannot be a context indirection), every userpage must be updated
+ * when context time starts :-(
+ *
+ * IOW, we must not miss EVENT_TIME edges.
+ */
static inline bool event_update_userpage(struct perf_event *event)
{
if (likely(!atomic_read(&event->mmap_count)))
return false;
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_update_userpage(event);
return true;
@@ -3801,13 +3846,23 @@ ctx_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
int is_active = ctx->is_active;
- u64 now;
lockdep_assert_held(&ctx->lock);
if (likely(!ctx->nr_events))
return;
+ if (is_active ^ EVENT_TIME) {
+ /* start ctx time */
+ __update_context_time(ctx, false);
+ perf_cgroup_set_timestamp(task, ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
ctx->is_active |= (event_type | EVENT_TIME);
if (ctx->task) {
if (!is_active)
@@ -3818,13 +3873,6 @@ ctx_sched_in(struct perf_event_context *ctx,
is_active ^= ctx->is_active; /* changed bits */
- if (is_active & EVENT_TIME) {
- /* start ctx time */
- now = perf_clock();
- ctx->timestamp = now;
- perf_cgroup_set_timestamp(task, ctx);
- }
-
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -4418,6 +4466,18 @@ static inline u64 perf_event_count(struct perf_event *event)
return local64_read(&event->count) + atomic64_read(&event->child_count);
}
+static void calc_timer_values(struct perf_event *event,
+ u64 *now,
+ u64 *enabled,
+ u64 *running)
+{
+ u64 ctx_time;
+
+ *now = perf_clock();
+ ctx_time = perf_event_time_now(event, *now);
+ __perf_update_times(event, ctx_time, enabled, running);
+}
+
/*
* NMI-safe method to read a local event, that is an event that
* is:
@@ -4477,10 +4537,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
*value = local64_read(&event->count);
if (enabled || running) {
- u64 now = event->shadow_ctx_time + perf_clock();
- u64 __enabled, __running;
+ u64 __enabled, __running, __now;;
- __perf_update_times(event, now, &__enabled, &__running);
+ calc_timer_values(event, &__now, &__enabled, &__running);
if (enabled)
*enabled = __enabled;
if (running)
@@ -5802,18 +5861,6 @@ static int perf_event_index(struct perf_event *event)
return event->pmu->event_idx(event);
}
-static void calc_timer_values(struct perf_event *event,
- u64 *now,
- u64 *enabled,
- u64 *running)
-{
- u64 ctx_time;
-
- *now = perf_clock();
- ctx_time = event->shadow_ctx_time + *now;
- __perf_update_times(event, ctx_time, enabled, running);
-}
-
static void perf_event_init_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
@@ -6353,7 +6400,6 @@ accounting:
ring_buffer_attach(event, rb);
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_init_userpage(event);
perf_event_update_userpage(event);
} else {
diff --git a/kernel/exit.c b/kernel/exit.c
index f702a6a63686..b00a25bb4ab9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -116,7 +116,7 @@ static void __exit_signal(struct task_struct *tsk)
* then notify it:
*/
if (sig->notify_count > 0 && !--sig->notify_count)
- wake_up_process(sig->group_exit_task);
+ wake_up_process(sig->group_exec_task);
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
@@ -697,7 +697,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/* mt-exec, de_thread() is waiting for group leader */
if (unlikely(tsk->signal->notify_count < 0))
- wake_up_process(tsk->signal->group_exit_task);
+ wake_up_process(tsk->signal->group_exec_task);
write_unlock_irq(&tasklist_lock);
list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
@@ -735,37 +735,22 @@ void __noreturn do_exit(long code)
struct task_struct *tsk = current;
int group_dead;
- /*
- * We can get here from a kernel oops, sometimes with preemption off.
- * Start by checking for critical errors.
- * Then fix up important state like USER_DS and preemption.
- * Then do everything else.
- */
-
WARN_ON(blk_needs_flush_plug(tsk));
- if (unlikely(in_interrupt()))
- panic("Aiee, killing interrupt handler!");
- if (unlikely(!tsk->pid))
- panic("Attempted to kill the idle task!");
-
/*
- * If do_exit is called because this processes oopsed, it's possible
+ * If do_dead is called because this processes oopsed, it's possible
* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
* continuing. Amongst other possible reasons, this is to prevent
* mm_release()->clear_child_tid() from writing to a user-controlled
* kernel address.
+ *
+ * On uptodate architectures force_uaccess_begin is a noop. On
+ * architectures that still have set_fs/get_fs in addition to handling
+ * oopses handles kernel threads that run as set_fs(KERNEL_DS) by
+ * default.
*/
force_uaccess_begin();
- if (unlikely(in_atomic())) {
- pr_info("note: %s[%d] exited with preempt_count %d\n",
- current->comm, task_pid_nr(current),
- preempt_count());
- preempt_count_set(PREEMPT_ENABLED);
- }
-
- profile_task_exit(tsk);
kcov_task_exit(tsk);
coredump_task_exit(tsk);
@@ -773,17 +758,6 @@ void __noreturn do_exit(long code)
validate_creds_for_do_exit(tsk);
- /*
- * We're taking recursive faults here in do_exit. Safest is to just
- * leave this task alone and wait for reboot.
- */
- if (unlikely(tsk->flags & PF_EXITING)) {
- pr_alert("Fixing recursive fault but reboot is needed!\n");
- futex_exit_recursive(tsk);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
@@ -882,16 +856,46 @@ void __noreturn do_exit(long code)
lockdep_free_task(tsk);
do_task_dead();
}
-EXPORT_SYMBOL_GPL(do_exit);
-void complete_and_exit(struct completion *comp, long code)
+void __noreturn make_task_dead(int signr)
{
- if (comp)
- complete(comp);
+ /*
+ * Take the task off the cpu after something catastrophic has
+ * happened.
+ *
+ * We can get here from a kernel oops, sometimes with preemption off.
+ * Start by checking for critical errors.
+ * Then fix up important state like USER_DS and preemption.
+ * Then do everything else.
+ */
+ struct task_struct *tsk = current;
- do_exit(code);
+ if (unlikely(in_interrupt()))
+ panic("Aiee, killing interrupt handler!");
+ if (unlikely(!tsk->pid))
+ panic("Attempted to kill the idle task!");
+
+ if (unlikely(in_atomic())) {
+ pr_info("note: %s[%d] exited with preempt_count %d\n",
+ current->comm, task_pid_nr(current),
+ preempt_count());
+ preempt_count_set(PREEMPT_ENABLED);
+ }
+
+ /*
+ * We're taking recursive faults here in make_task_dead. Safest is to just
+ * leave this task alone and wait for reboot.
+ */
+ if (unlikely(tsk->flags & PF_EXITING)) {
+ pr_alert("Fixing recursive fault but reboot is needed!\n");
+ futex_exit_recursive(tsk);
+ tsk->exit_state = EXIT_DEAD;
+ refcount_inc(&tsk->rcu_users);
+ do_task_dead();
+ }
+
+ do_exit(signr);
}
-EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
{
@@ -907,17 +911,19 @@ do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
- BUG_ON(exit_code & 0x80); /* core dumps don't get here */
-
- if (signal_group_exit(sig))
+ if (sig->flags & SIGNAL_GROUP_EXIT)
exit_code = sig->group_exit_code;
+ else if (sig->group_exec_task)
+ exit_code = 0;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
spin_lock_irq(&sighand->siglock);
- if (signal_group_exit(sig))
+ if (sig->flags & SIGNAL_GROUP_EXIT)
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
+ else if (sig->group_exec_task)
+ exit_code = 0;
else {
sig->group_exit_code = exit_code;
sig->flags = SIGNAL_GROUP_EXIT;
@@ -1012,7 +1018,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
return 0;
if (unlikely(wo->wo_flags & WNOWAIT)) {
- status = p->exit_code;
+ status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+ ? p->signal->group_exit_code : p->exit_code;
get_task_struct(p);
read_unlock(&tasklist_lock);
sched_annotate_sleep();
diff --git a/kernel/fork.c b/kernel/fork.c
index 3161d7980155..d75a528f7b21 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -42,6 +42,7 @@
#include <linux/mmu_notifier.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/vmacache.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
@@ -365,12 +366,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
*new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL;
+ dup_vma_anon_name(orig, new);
}
return new;
}
void vm_area_free(struct vm_area_struct *vma)
{
+ free_vma_anon_name(vma);
kmem_cache_free(vm_area_cachep, vma);
}
@@ -754,9 +757,7 @@ void __put_task_struct(struct task_struct *tsk)
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
sched_core_free(tsk);
-
- if (!profile_handoff_task(tsk))
- free_task(tsk);
+ free_task(tsk);
}
EXPORT_SYMBOL_GPL(__put_task_struct);
@@ -950,7 +951,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
- tsk->pf_io_worker = NULL;
+ tsk->worker_private = NULL;
account_kernel_stack(tsk, 1);
@@ -2006,12 +2007,6 @@ static __latent_entropy struct task_struct *copy_process(
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}
- /*
- * This _must_ happen before we call free_task(), i.e. before we jump
- * to any of the bad_fork_* labels. This is to avoid freeing
- * p->set_child_tid which is (ab)used as a kthread's data pointer for
- * kernel threads (PF_KTHREAD).
- */
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
/*
* Clear TID on mm_release()?
@@ -2092,12 +2087,16 @@ static __latent_entropy struct task_struct *copy_process(
p->io_context = NULL;
audit_set_context(p, NULL);
cgroup_fork(p);
+ if (p->flags & PF_KTHREAD) {
+ if (!set_kthread_struct(p))
+ goto bad_fork_cleanup_delayacct;
+ }
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
- goto bad_fork_cleanup_threadgroup_lock;
+ goto bad_fork_cleanup_delayacct;
}
#endif
#ifdef CONFIG_CPUSETS
@@ -2434,8 +2433,8 @@ bad_fork_cleanup_policy:
lockdep_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
-bad_fork_cleanup_threadgroup_lock:
#endif
+bad_fork_cleanup_delayacct:
delayacct_tsk_free(p);
bad_fork_cleanup_count:
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 926c2bb752bc..51dd822a8060 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -1031,7 +1031,7 @@ static void futex_cleanup(struct task_struct *tsk)
* actually finished the futex cleanup. The worst case for this is that the
* waiter runs through the wait loop until the state becomes visible.
*
- * This is called from the recursive fault handling path in do_exit().
+ * This is called from the recursive fault handling path in make_task_dead().
*
* This is best effort. Either the futex exit code has run already or
* not. If the OWNER_DIED bit has been set on the futex then the waiter can
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 053447183ac5..04f4ebdc3cf5 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -4,7 +4,6 @@ menu "GCOV-based kernel profiling"
config GCOV_KERNEL
bool "Enable gcov-based kernel profiling"
depends on DEBUG_FS
- depends on !CC_IS_CLANG || CLANG_VERSION >= 110000
depends on !ARCH_WANTS_NO_INSTR || CC_HAS_NO_PROFILE_FN_ATTR
select CONSTRUCTORS
default n
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 9888e2bc8c76..52501e5f7655 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -63,7 +63,9 @@ static struct task_struct *watchdog_task;
* Should we dump all CPUs backtraces in a hung task event?
* Defaults to 0, can be changed via sysctl.
*/
-unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+#else
+#define sysctl_hung_task_all_cpu_backtrace 0
#endif /* CONFIG_SMP */
/*
@@ -222,11 +224,13 @@ static long hung_timeout_jiffies(unsigned long last_checked,
MAX_SCHEDULE_TIMEOUT;
}
+#ifdef CONFIG_SYSCTL
/*
* Process updating of timeout sysctl
*/
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
{
int ret;
@@ -241,6 +245,76 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
return ret;
}
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static struct ctl_table hung_task_sysctls[] = {
+#ifdef CONFIG_SMP
+ {
+ .procname = "hung_task_all_cpu_backtrace",
+ .data = &sysctl_hung_task_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+ {
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = (void *)&hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = (void *)&hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_NEG_ONE,
+ },
+ {}
+};
+
+static void __init hung_task_sysctl_init(void)
+{
+ register_sysctl_init("kernel", hung_task_sysctls);
+}
+#else
+#define hung_task_sysctl_init() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+
static atomic_t reset_hung_task = ATOMIC_INIT(0);
void reset_hung_task_detector(void)
@@ -310,6 +384,7 @@ static int __init hung_task_init(void)
pm_notifier(hungtask_pm_notify, 0);
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+ hung_task_sysctl_init();
return 0;
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ee595ec09778..623b8136e9af 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -137,7 +137,7 @@ static inline int irq_select_affinity_usr(unsigned int irq)
static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
- unsigned int irq = (int)(long)PDE_DATA(file_inode(file));
+ unsigned int irq = (int)(long)pde_data(file_inode(file));
cpumask_var_t new_value;
int err;
@@ -190,12 +190,12 @@ static ssize_t irq_affinity_list_proc_write(struct file *file,
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_proc_show, pde_data(inode));
}
static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_list_proc_show, pde_data(inode));
}
static const struct proc_ops irq_affinity_proc_ops = {
@@ -265,7 +265,7 @@ out:
static int default_affinity_open(struct inode *inode, struct file *file)
{
- return single_open(file, default_affinity_show, PDE_DATA(inode));
+ return single_open(file, default_affinity_show, pde_data(inode));
}
static const struct proc_ops default_affinity_proc_ops = {
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 3011bc33a5ba..951c93216fc4 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -243,6 +243,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
ret = fn(data, namebuf, NULL, kallsyms_sym_address(i));
if (ret != 0)
return ret;
+ cond_resched();
}
return 0;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5a5d192a89ac..68480f731192 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -81,7 +81,7 @@ int kexec_should_crash(struct task_struct *p)
if (crash_kexec_post_notifiers)
return 0;
/*
- * There are 4 panic() calls in do_exit() path, each of which
+ * There are 4 panic() calls in make_task_dead() path, each of which
* corresponds to each of these 4 conditions.
*/
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 21eccc961bba..94cab8c9ce56 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -48,6 +48,9 @@
#define KPROBE_HASH_BITS 6
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+#if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
+#define kprobe_sysctls_init() do { } while (0)
+#endif
static int kprobes_initialized;
/* kprobe_table can be accessed by
@@ -938,10 +941,10 @@ static void unoptimize_all_kprobes(void)
}
static DEFINE_MUTEX(kprobe_sysctl_mutex);
-int sysctl_kprobes_optimization;
-int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
- void *buffer, size_t *length,
- loff_t *ppos)
+static int sysctl_kprobes_optimization;
+static int proc_kprobes_optimization_handler(struct ctl_table *table,
+ int write, void *buffer,
+ size_t *length, loff_t *ppos)
{
int ret;
@@ -957,6 +960,24 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
return ret;
}
+
+static struct ctl_table kprobe_sysctls[] = {
+ {
+ .procname = "kprobes-optimization",
+ .data = &sysctl_kprobes_optimization,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_kprobes_optimization_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static void __init kprobe_sysctls_init(void)
+{
+ register_sysctl_init("debug", kprobe_sysctls);
+}
#endif /* CONFIG_SYSCTL */
/* Put a breakpoint for a probe. */
@@ -2584,6 +2605,7 @@ static int __init init_kprobes(void)
err = register_module_notifier(&kprobe_module_nb);
kprobes_initialized = (err == 0);
+ kprobe_sysctls_init();
return err;
}
early_initcall(init_kprobes);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 7113003fab63..38c6dd822da8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -52,6 +52,7 @@ struct kthread_create_info
struct kthread {
unsigned long flags;
unsigned int cpu;
+ int result;
int (*threadfn)(void *);
void *data;
mm_segment_t oldfs;
@@ -60,6 +61,8 @@ struct kthread {
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
+ /* To store the full name if task comm is truncated. */
+ char *full_name;
};
enum KTHREAD_BITS {
@@ -71,7 +74,7 @@ enum KTHREAD_BITS {
static inline struct kthread *to_kthread(struct task_struct *k)
{
WARN_ON(!(k->flags & PF_KTHREAD));
- return (__force void *)k->set_child_tid;
+ return k->worker_private;
}
/*
@@ -79,7 +82,7 @@ static inline struct kthread *to_kthread(struct task_struct *k)
*
* Per construction; when:
*
- * (p->flags & PF_KTHREAD) && p->set_child_tid
+ * (p->flags & PF_KTHREAD) && p->worker_private
*
* the task is both a kthread and struct kthread is persistent. However
* PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
@@ -87,26 +90,41 @@ static inline struct kthread *to_kthread(struct task_struct *k)
*/
static inline struct kthread *__to_kthread(struct task_struct *p)
{
- void *kthread = (__force void *)p->set_child_tid;
+ void *kthread = p->worker_private;
if (kthread && !(p->flags & PF_KTHREAD))
kthread = NULL;
return kthread;
}
-void set_kthread_struct(struct task_struct *p)
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
- struct kthread *kthread;
+ struct kthread *kthread = to_kthread(tsk);
- if (__to_kthread(p))
+ if (!kthread || !kthread->full_name) {
+ __get_task_comm(buf, buf_size, tsk);
return;
+ }
+
+ strscpy_pad(buf, kthread->full_name, buf_size);
+}
+
+bool set_kthread_struct(struct task_struct *p)
+{
+ struct kthread *kthread;
+
+ if (WARN_ON_ONCE(to_kthread(p)))
+ return false;
kthread = kzalloc(sizeof(*kthread), GFP_KERNEL);
- /*
- * We abuse ->set_child_tid to avoid the new member and because it
- * can't be wrongly copied by copy_process(). We also rely on fact
- * that the caller can't exec, so PF_KTHREAD can't be cleared.
- */
- p->set_child_tid = (__force void __user *)kthread;
+ if (!kthread)
+ return false;
+
+ init_completion(&kthread->exited);
+ init_completion(&kthread->parked);
+ p->vfork_done = &kthread->exited;
+
+ p->worker_private = kthread;
+ return true;
}
void free_kthread_struct(struct task_struct *k)
@@ -114,13 +132,17 @@ void free_kthread_struct(struct task_struct *k)
struct kthread *kthread;
/*
- * Can be NULL if this kthread was created by kernel_thread()
- * or if kmalloc() in kthread() failed.
+ * Can be NULL if kmalloc() in set_kthread_struct() failed.
*/
kthread = to_kthread(k);
+ if (!kthread)
+ return;
+
#ifdef CONFIG_BLK_CGROUP
- WARN_ON_ONCE(kthread && kthread->blkcg_css);
+ WARN_ON_ONCE(kthread->blkcg_css);
#endif
+ k->worker_private = NULL;
+ kfree(kthread->full_name);
kfree(kthread);
}
@@ -268,6 +290,44 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
+/**
+ * kthread_exit - Cause the current kthread return @result to kthread_stop().
+ * @result: The integer value to return to kthread_stop().
+ *
+ * While kthread_exit can be called directly, it exists so that
+ * functions which do some additional work in non-modular code such as
+ * module_put_and_kthread_exit can be implemented.
+ *
+ * Does not return.
+ */
+void __noreturn kthread_exit(long result)
+{
+ struct kthread *kthread = to_kthread(current);
+ kthread->result = result;
+ do_exit(0);
+}
+
+/**
+ * kthread_complete_and_exit - Exit the current kthread.
+ * @comp: Completion to complete
+ * @code: The integer value to return to kthread_stop().
+ *
+ * If present complete @comp and the reuturn code to kthread_stop().
+ *
+ * A kernel thread whose module may be removed after the completion of
+ * @comp can use this function exit safely.
+ *
+ * Does not return.
+ */
+void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
+{
+ if (comp)
+ complete(comp);
+
+ kthread_exit(code);
+}
+EXPORT_SYMBOL(kthread_complete_and_exit);
+
static int kthread(void *_create)
{
static const struct sched_param param = { .sched_priority = 0 };
@@ -279,27 +339,17 @@ static int kthread(void *_create)
struct kthread *self;
int ret;
- set_kthread_struct(current);
self = to_kthread(current);
/* If user was SIGKILLed, I release the structure. */
done = xchg(&create->done, NULL);
if (!done) {
kfree(create);
- do_exit(-EINTR);
- }
-
- if (!self) {
- create->result = ERR_PTR(-ENOMEM);
- complete(done);
- do_exit(-ENOMEM);
+ kthread_exit(-EINTR);
}
self->threadfn = threadfn;
self->data = data;
- init_completion(&self->exited);
- init_completion(&self->parked);
- current->vfork_done = &self->exited;
/*
* The new thread inherited kthreadd's priority and CPU mask. Reset
@@ -326,7 +376,7 @@ static int kthread(void *_create)
__kthread_parkme(self);
ret = threadfn(data);
}
- do_exit(ret);
+ kthread_exit(ret);
}
/* called from kernel_clone() to get node information for about to be created task */
@@ -406,12 +456,22 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
task = create->result;
if (!IS_ERR(task)) {
char name[TASK_COMM_LEN];
+ va_list aq;
+ int len;
/*
* task is already visible to other tasks, so updating
* COMM must be protected.
*/
- vsnprintf(name, sizeof(name), namefmt, args);
+ va_copy(aq, args);
+ len = vsnprintf(name, sizeof(name), namefmt, aq);
+ va_end(aq);
+ if (len >= TASK_COMM_LEN) {
+ struct kthread *kthread = to_kthread(task);
+
+ /* leave it truncated when out of memory. */
+ kthread->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
+ }
set_task_comm(task, name);
}
kfree(create);
@@ -523,6 +583,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
to_kthread(p)->cpu = cpu;
return p;
}
+EXPORT_SYMBOL(kthread_create_on_cpu);
void kthread_set_per_cpu(struct task_struct *k, int cpu)
{
@@ -627,7 +688,7 @@ EXPORT_SYMBOL_GPL(kthread_park);
* instead of calling wake_up_process(): the thread will exit without
* calling threadfn().
*
- * If threadfn() may call do_exit() itself, the caller must ensure
+ * If threadfn() may call kthread_exit() itself, the caller must ensure
* task_struct can't go away.
*
* Returns the result of threadfn(), or %-EINTR if wake_up_process()
@@ -646,7 +707,7 @@ int kthread_stop(struct task_struct *k)
kthread_unpark(k);
wake_up_process(k);
wait_for_completion(&kthread->exited);
- ret = k->exit_code;
+ ret = kthread->result;
put_task_struct(k);
trace_sched_kthread_stop_ret(ret);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 335d988bd811..585494ec464f 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -862,14 +862,11 @@ static void klp_init_object_early(struct klp_patch *patch,
list_add_tail(&obj->node, &patch->obj_list);
}
-static int klp_init_patch_early(struct klp_patch *patch)
+static void klp_init_patch_early(struct klp_patch *patch)
{
struct klp_object *obj;
struct klp_func *func;
- if (!patch->objs)
- return -EINVAL;
-
INIT_LIST_HEAD(&patch->list);
INIT_LIST_HEAD(&patch->obj_list);
kobject_init(&patch->kobj, &klp_ktype_patch);
@@ -879,20 +876,12 @@ static int klp_init_patch_early(struct klp_patch *patch)
init_completion(&patch->finish);
klp_for_each_object_static(patch, obj) {
- if (!obj->funcs)
- return -EINVAL;
-
klp_init_object_early(patch, obj);
klp_for_each_func_static(obj, func) {
klp_init_func_early(obj, func);
}
}
-
- if (!try_module_get(patch->mod))
- return -ENODEV;
-
- return 0;
}
static int klp_init_patch(struct klp_patch *patch)
@@ -1024,10 +1013,17 @@ err:
int klp_enable_patch(struct klp_patch *patch)
{
int ret;
+ struct klp_object *obj;
- if (!patch || !patch->mod)
+ if (!patch || !patch->mod || !patch->objs)
return -EINVAL;
+ klp_for_each_object_static(patch, obj) {
+ if (!obj->funcs)
+ return -EINVAL;
+ }
+
+
if (!is_livepatch_module(patch->mod)) {
pr_err("module %s is not marked as a livepatch module\n",
patch->mod->name);
@@ -1051,12 +1047,13 @@ int klp_enable_patch(struct klp_patch *patch)
return -EINVAL;
}
- ret = klp_init_patch_early(patch);
- if (ret) {
+ if (!try_module_get(patch->mod)) {
mutex_unlock(&klp_mutex);
- return ret;
+ return -ENODEV;
}
+ klp_init_patch_early(patch);
+
ret = klp_init_patch(patch);
if (ret)
goto err;
diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c
index e5c9fb295ba9..c2e724d97ddf 100644
--- a/kernel/livepatch/shadow.c
+++ b/kernel/livepatch/shadow.c
@@ -272,12 +272,12 @@ void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
EXPORT_SYMBOL_GPL(klp_shadow_free);
/**
- * klp_shadow_free_all() - detach and free all <*, id> shadow variables
+ * klp_shadow_free_all() - detach and free all <_, id> shadow variables
* @id: data identifier
* @dtor: custom callback that can be used to unregister the variable
* and/or free data that the shadow variable points to (optional)
*
- * This function releases the memory for all <*, id> shadow variable
+ * This function releases the memory for all <_, id> shadow variable
* instances, callers should stop referencing them accordingly.
*/
void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
@@ -288,7 +288,7 @@ void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
spin_lock_irqsave(&klp_shadow_lock, flags);
- /* Delete all <*, id> from hash */
+ /* Delete all <_, id> from hash */
hash_for_each(klp_shadow_hash, i, shadow, node) {
if (klp_shadow_match(shadow, shadow->obj, id))
klp_shadow_free_struct(shadow, dtor);
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index b562f9289372..7f49baaa4979 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -300,6 +300,16 @@ void __lockfunc _raw_write_lock(rwlock_t *lock)
__raw_write_lock(lock);
}
EXPORT_SYMBOL(_raw_write_lock);
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+#define __raw_write_lock_nested(lock, subclass) __raw_write_lock(((void)(subclass), (lock)))
+#endif
+
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+ __raw_write_lock_nested(lock, subclass);
+}
+EXPORT_SYMBOL(_raw_write_lock_nested);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index 9e396a09fe0f..48a19ed8486d 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -239,6 +239,18 @@ void __sched rt_write_lock(rwlock_t *rwlock)
}
EXPORT_SYMBOL(rt_write_lock);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass)
+{
+ rtlock_might_resched();
+ rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
+ rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ rcu_read_lock();
+ migrate_disable();
+}
+EXPORT_SYMBOL(rt_write_lock_nested);
+#endif
+
void __sched rt_read_unlock(rwlock_t *rwlock)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 33783abc377b..8c381c99062f 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -23,9 +23,28 @@ struct load_info {
#ifdef CONFIG_KALLSYMS
unsigned long mod_kallsyms_init_off;
#endif
+#ifdef CONFIG_MODULE_DECOMPRESS
+ struct page **pages;
+ unsigned int max_pages;
+ unsigned int used_pages;
+#endif
struct {
unsigned int sym, str, mod, vers, info, pcpu;
} index;
};
extern int mod_verify_sig(const void *mod, struct load_info *info);
+
+#ifdef CONFIG_MODULE_DECOMPRESS
+int module_decompress(struct load_info *info, const void *buf, size_t size);
+void module_decompress_cleanup(struct load_info *info);
+#else
+static inline int module_decompress(struct load_info *info,
+ const void *buf, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+static inline void module_decompress_cleanup(struct load_info *info)
+{
+}
+#endif
diff --git a/kernel/module.c b/kernel/module.c
index 84a9141a5e15..24dab046e16c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -337,12 +337,12 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
* A thread that wants to hold a reference to a module only while it
* is running can call this to safely exit. nfsd and lockd use this.
*/
-void __noreturn __module_put_and_exit(struct module *mod, long code)
+void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
{
module_put(mod);
- do_exit(code);
+ kthread_exit(code);
}
-EXPORT_SYMBOL(__module_put_and_exit);
+EXPORT_SYMBOL(__module_put_and_kthread_exit);
/* Find a module section: 0 means not found. */
static unsigned int find_sec(const struct load_info *info, const char *name)
@@ -958,7 +958,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
}
}
- /* Stop the machine so refcounts can't move and disable module. */
ret = try_stop_module(mod, flags, &forced);
if (ret != 0)
goto out;
@@ -2884,12 +2883,13 @@ static int module_sig_check(struct load_info *info, int flags)
const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
const char *reason;
const void *mod = info->hdr;
-
+ bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS |
+ MODULE_INIT_IGNORE_VERMAGIC);
/*
- * Require flags == 0, as a module with version information
- * removed is no longer the module that was signed
+ * Do not allow mangled modules as a module with version information
+ * removed is no longer the module that was signed.
*/
- if (flags == 0 &&
+ if (!mangled_module &&
info->len > markerlen &&
memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
/* We truncate the module to discard the signature */
@@ -3174,9 +3174,12 @@ out:
return err;
}
-static void free_copy(struct load_info *info)
+static void free_copy(struct load_info *info, int flags)
{
- vfree(info->hdr);
+ if (flags & MODULE_INIT_COMPRESSED_FILE)
+ module_decompress_cleanup(info);
+ else
+ vfree(info->hdr);
}
static int rewrite_section_headers(struct load_info *info, int flags)
@@ -4125,7 +4128,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
}
/* Get rid of temporary copy. */
- free_copy(info);
+ free_copy(info, flags);
/* Done! */
trace_module_load(mod);
@@ -4174,7 +4177,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
module_deallocate(mod, info);
free_copy:
- free_copy(info);
+ free_copy(info, flags);
return err;
}
@@ -4201,7 +4204,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
{
struct load_info info = { };
- void *hdr = NULL;
+ void *buf = NULL;
+ int len;
int err;
err = may_init_module();
@@ -4211,15 +4215,24 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
- |MODULE_INIT_IGNORE_VERMAGIC))
+ |MODULE_INIT_IGNORE_VERMAGIC
+ |MODULE_INIT_COMPRESSED_FILE))
return -EINVAL;
- err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL,
+ len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
READING_MODULE);
- if (err < 0)
- return err;
- info.hdr = hdr;
- info.len = err;
+ if (len < 0)
+ return len;
+
+ if (flags & MODULE_INIT_COMPRESSED_FILE) {
+ err = module_decompress(&info, buf, len);
+ vfree(buf); /* compressed data is no longer needed */
+ if (err)
+ return err;
+ } else {
+ info.hdr = buf;
+ info.len = len;
+ }
return load_module(&info, uargs, flags);
}
@@ -4499,6 +4512,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
mod, kallsyms_symbol_value(sym));
if (ret != 0)
goto out;
+
+ cond_resched();
}
}
out:
diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c
new file mode 100644
index 000000000000..b01c69c2ff99
--- /dev/null
+++ b/kernel/module_decompress.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+
+#include "module-internal.h"
+
+static int module_extend_max_pages(struct load_info *info, unsigned int extent)
+{
+ struct page **new_pages;
+
+ new_pages = kvmalloc_array(info->max_pages + extent,
+ sizeof(info->pages), GFP_KERNEL);
+ if (!new_pages)
+ return -ENOMEM;
+
+ memcpy(new_pages, info->pages, info->max_pages * sizeof(info->pages));
+ kvfree(info->pages);
+ info->pages = new_pages;
+ info->max_pages += extent;
+
+ return 0;
+}
+
+static struct page *module_get_next_page(struct load_info *info)
+{
+ struct page *page;
+ int error;
+
+ if (info->max_pages == info->used_pages) {
+ error = module_extend_max_pages(info, info->used_pages);
+ if (error)
+ return ERR_PTR(error);
+ }
+
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ info->pages[info->used_pages++] = page;
+ return page;
+}
+
+#ifdef CONFIG_MODULE_COMPRESS_GZIP
+#include <linux/zlib.h>
+#define MODULE_COMPRESSION gzip
+#define MODULE_DECOMPRESS_FN module_gzip_decompress
+
+/*
+ * Calculate length of the header which consists of signature, header
+ * flags, time stamp and operating system ID (10 bytes total), plus
+ * an optional filename.
+ */
+static size_t module_gzip_header_len(const u8 *buf, size_t size)
+{
+ const u8 signature[] = { 0x1f, 0x8b, 0x08 };
+ size_t len = 10;
+
+ if (size < len || memcmp(buf, signature, sizeof(signature)))
+ return 0;
+
+ if (buf[3] & 0x08) {
+ do {
+ /*
+ * If we can't find the end of the file name we must
+ * be dealing with a corrupted file.
+ */
+ if (len == size)
+ return 0;
+ } while (buf[len++] != '\0');
+ }
+
+ return len;
+}
+
+static ssize_t module_gzip_decompress(struct load_info *info,
+ const void *buf, size_t size)
+{
+ struct z_stream_s s = { 0 };
+ size_t new_size = 0;
+ size_t gzip_hdr_len;
+ ssize_t retval;
+ int rc;
+
+ gzip_hdr_len = module_gzip_header_len(buf, size);
+ if (!gzip_hdr_len) {
+ pr_err("not a gzip compressed module\n");
+ return -EINVAL;
+ }
+
+ s.next_in = buf + gzip_hdr_len;
+ s.avail_in = size - gzip_hdr_len;
+
+ s.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
+ if (!s.workspace)
+ return -ENOMEM;
+
+ rc = zlib_inflateInit2(&s, -MAX_WBITS);
+ if (rc != Z_OK) {
+ pr_err("failed to initialize decompressor: %d\n", rc);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ do {
+ struct page *page = module_get_next_page(info);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out_inflate_end;
+ }
+
+ s.next_out = kmap(page);
+ s.avail_out = PAGE_SIZE;
+ rc = zlib_inflate(&s, 0);
+ kunmap(page);
+
+ new_size += PAGE_SIZE - s.avail_out;
+ } while (rc == Z_OK);
+
+ if (rc != Z_STREAM_END) {
+ pr_err("decompression failed with status %d\n", rc);
+ retval = -EINVAL;
+ goto out_inflate_end;
+ }
+
+ retval = new_size;
+
+out_inflate_end:
+ zlib_inflateEnd(&s);
+out:
+ kfree(s.workspace);
+ return retval;
+}
+#elif CONFIG_MODULE_COMPRESS_XZ
+#include <linux/xz.h>
+#define MODULE_COMPRESSION xz
+#define MODULE_DECOMPRESS_FN module_xz_decompress
+
+static ssize_t module_xz_decompress(struct load_info *info,
+ const void *buf, size_t size)
+{
+ static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
+ struct xz_dec *xz_dec;
+ struct xz_buf xz_buf;
+ enum xz_ret xz_ret;
+ size_t new_size = 0;
+ ssize_t retval;
+
+ if (size < sizeof(signature) ||
+ memcmp(buf, signature, sizeof(signature))) {
+ pr_err("not an xz compressed module\n");
+ return -EINVAL;
+ }
+
+ xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
+ if (!xz_dec)
+ return -ENOMEM;
+
+ xz_buf.in_size = size;
+ xz_buf.in = buf;
+ xz_buf.in_pos = 0;
+
+ do {
+ struct page *page = module_get_next_page(info);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ xz_buf.out = kmap(page);
+ xz_buf.out_pos = 0;
+ xz_buf.out_size = PAGE_SIZE;
+ xz_ret = xz_dec_run(xz_dec, &xz_buf);
+ kunmap(page);
+
+ new_size += xz_buf.out_pos;
+ } while (xz_buf.out_pos == PAGE_SIZE && xz_ret == XZ_OK);
+
+ if (xz_ret != XZ_STREAM_END) {
+ pr_err("decompression failed with status %d\n", xz_ret);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = new_size;
+
+ out:
+ xz_dec_end(xz_dec);
+ return retval;
+}
+#else
+#error "Unexpected configuration for CONFIG_MODULE_DECOMPRESS"
+#endif
+
+int module_decompress(struct load_info *info, const void *buf, size_t size)
+{
+ unsigned int n_pages;
+ ssize_t data_size;
+ int error;
+
+ /*
+ * Start with number of pages twice as big as needed for
+ * compressed data.
+ */
+ n_pages = DIV_ROUND_UP(size, PAGE_SIZE) * 2;
+ error = module_extend_max_pages(info, n_pages);
+
+ data_size = MODULE_DECOMPRESS_FN(info, buf, size);
+ if (data_size < 0) {
+ error = data_size;
+ goto err;
+ }
+
+ info->hdr = vmap(info->pages, info->used_pages, VM_MAP, PAGE_KERNEL);
+ if (!info->hdr) {
+ error = -ENOMEM;
+ goto err;
+ }
+
+ info->len = data_size;
+ return 0;
+
+err:
+ module_decompress_cleanup(info);
+ return error;
+}
+
+void module_decompress_cleanup(struct load_info *info)
+{
+ int i;
+
+ if (info->hdr)
+ vunmap(info->hdr);
+
+ for (i = 0; i < info->used_pages; i++)
+ __free_page(info->pages[i]);
+
+ kvfree(info->pages);
+
+ info->pages = NULL;
+ info->max_pages = info->used_pages = 0;
+}
+
+static ssize_t compression_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", __stringify(MODULE_COMPRESSION));
+}
+static struct kobj_attribute module_compression_attr = __ATTR_RO(compression);
+
+static int __init module_decompress_sysfs_init(void)
+{
+ int error;
+
+ error = sysfs_create_file(&module_kset->kobj,
+ &module_compression_attr.attr);
+ if (error)
+ pr_warn("Failed to create 'compression' attribute");
+
+ return 0;
+}
+late_initcall(module_decompress_sysfs_init);
diff --git a/kernel/panic.c b/kernel/panic.c
index cefd7d82366f..55b50e052ec3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -32,6 +32,7 @@
#include <linux/bug.h>
#include <linux/ratelimit.h>
#include <linux/debugfs.h>
+#include <trace/events/error_report.h>
#include <asm/sections.h>
#define PANIC_TIMER_STEP 100
@@ -533,26 +534,9 @@ void oops_enter(void)
trigger_all_cpu_backtrace();
}
-/*
- * 64-bit random ID for oopses:
- */
-static u64 oops_id;
-
-static int init_oops_id(void)
-{
- if (!oops_id)
- get_random_bytes(&oops_id, sizeof(oops_id));
- else
- oops_id++;
-
- return 0;
-}
-late_initcall(init_oops_id);
-
static void print_oops_end_marker(void)
{
- init_oops_id();
- pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+ pr_warn("---[ end trace %016llx ]---\n", 0ULL);
}
/*
@@ -609,6 +593,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
print_irqtrace_events(current);
print_oops_end_marker();
+ trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller);
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index d118739874c0..f5b388e810b9 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -2,5 +2,8 @@
obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
-obj-$(CONFIG_PRINTK) += printk_ringbuffer.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
+
+obj-$(CONFIG_PRINTK) += printk_support.o
+printk_support-y := printk_ringbuffer.o
+printk_support-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 9f3ed2fdb721..d947ca6c84f9 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -4,6 +4,14 @@
*/
#include <linux/percpu.h>
+#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
+void __init printk_sysctl_init(void);
+int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+#else
+#define printk_sysctl_init() do { } while (0)
+#endif
+
#ifdef CONFIG_PRINTK
/* Flags for a single printk record. */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 155229f0cf0f..82abfaf3c2aa 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -171,7 +171,7 @@ static int __init control_devkmsg(char *str)
__setup("printk.devkmsg=", control_devkmsg);
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
-
+#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -210,6 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
return 0;
}
+#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
/* Number of registered extended console drivers. */
static int nr_ext_console_drivers;
@@ -3211,6 +3212,7 @@ static int __init printk_late_init(void)
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
console_cpu_notify, NULL);
WARN_ON(ret < 0);
+ printk_sysctl_init();
return 0;
}
late_initcall(printk_late_init);
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
new file mode 100644
index 000000000000..653ae04aab7f
--- /dev/null
+++ b/kernel/printk/sysctl.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sysctl.c: General linux system control interface
+ */
+
+#include <linux/sysctl.h>
+#include <linux/printk.h>
+#include <linux/capability.h>
+#include <linux/ratelimit.h>
+#include "internal.h"
+
+static const int ten_thousand = 10000;
+
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table printk_sysctls[] = {
+ {
+ .procname = "printk",
+ .data = &console_loglevel,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_ratelimit",
+ .data = &printk_ratelimit_state.interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "printk_ratelimit_burst",
+ .data = &printk_ratelimit_state.burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_delay",
+ .data = &printk_delay_msec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&ten_thousand,
+ },
+ {
+ .procname = "printk_devkmsg",
+ .data = devkmsg_log_str,
+ .maxlen = DEVKMSG_STR_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = devkmsg_sysctl_set_loglvl,
+ },
+ {
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {}
+};
+
+void __init printk_sysctl_init(void)
+{
+ register_sysctl_init("kernel", printk_sysctls);
+}
diff --git a/kernel/profile.c b/kernel/profile.c
index eb9c7f0f5ac5..37640a0bd8a3 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -133,79 +133,6 @@ int __ref profile_init(void)
return -ENOMEM;
}
-/* Profile event notifications */
-
-static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
-static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
-
-void profile_task_exit(struct task_struct *task)
-{
- blocking_notifier_call_chain(&task_exit_notifier, 0, task);
-}
-
-int profile_handoff_task(struct task_struct *task)
-{
- int ret;
- ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
- return (ret == NOTIFY_OK) ? 1 : 0;
-}
-
-void profile_munmap(unsigned long addr)
-{
- blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
-}
-
-int task_handoff_register(struct notifier_block *n)
-{
- return atomic_notifier_chain_register(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_register);
-
-int task_handoff_unregister(struct notifier_block *n)
-{
- return atomic_notifier_chain_unregister(&task_free_notifier, n);
-}
-EXPORT_SYMBOL_GPL(task_handoff_unregister);
-
-int profile_event_register(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_register(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_register(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_register);
-
-int profile_event_unregister(enum profile_type type, struct notifier_block *n)
-{
- int err = -EINVAL;
-
- switch (type) {
- case PROFILE_TASK_EXIT:
- err = blocking_notifier_chain_unregister(
- &task_exit_notifier, n);
- break;
- case PROFILE_MUNMAP:
- err = blocking_notifier_chain_unregister(
- &munmap_notifier, n);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(profile_event_unregister);
-
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
* Each cpu has a pair of open-addressed hashtables for pending
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f8589bf8d7dc..eea265082e97 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -419,8 +419,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (task->ptrace)
goto unlock_tasklist;
- if (seize)
- flags |= PT_SEIZED;
task->ptrace = flags;
ptrace_link(task, current);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 33ea446101b3..422f7e4cc08d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2031,9 +2031,8 @@ static int rcutorture_booster_init(unsigned int cpu)
mutex_lock(&boost_mutex);
rcu_torture_disable_rt_throttle();
VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
- boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
- cpu_to_node(cpu),
- "rcu_torture_boost");
+ boost_tasks[cpu] = kthread_run_on_cpu(rcu_torture_boost, NULL,
+ cpu, "rcu_torture_boost_%u");
if (IS_ERR(boost_tasks[cpu])) {
retval = PTR_ERR(boost_tasks[cpu]);
VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed");
@@ -2042,8 +2041,6 @@ static int rcutorture_booster_init(unsigned int cpu)
mutex_unlock(&boost_mutex);
return retval;
}
- kthread_bind(boost_tasks[cpu], cpu);
- wake_up_process(boost_tasks[cpu]);
mutex_unlock(&boost_mutex);
return 0;
}
diff --git a/kernel/resource.c b/kernel/resource.c
index 5ad3eba619ba..9c08d6e9eef2 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -99,7 +99,7 @@ enum { MAX_IORES_LEVEL = 5 };
static void *r_start(struct seq_file *m, loff_t *pos)
__acquires(resource_lock)
{
- struct resource *p = PDE_DATA(file_inode(m->file));
+ struct resource *p = pde_data(file_inode(m->file));
loff_t l = 0;
read_lock(&resource_lock);
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
@@ -115,7 +115,7 @@ static void r_stop(struct seq_file *m, void *v)
static int r_show(struct seq_file *m, void *v)
{
- struct resource *root = PDE_DATA(file_inode(m->file));
+ struct resource *root = pde_data(file_inode(m->file));
struct resource *r = v, *p;
unsigned long long start, end;
int width = root->end < 0x10000 ? 4 : 8;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 83872f95a1ea..848eaa0efe0e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5822,8 +5822,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
if (schedstat_enabled() && rq->core->core_forceidle_count) {
- if (cookie)
- rq->core->core_forceidle_start = rq_clock(rq->core);
+ rq->core->core_forceidle_start = rq_clock(rq->core);
rq->core->core_forceidle_occupation = occ;
}
@@ -8219,9 +8218,7 @@ int __cond_resched_lock(spinlock_t *lock)
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
spin_lock(lock);
@@ -8239,9 +8236,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
read_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
read_lock(lock);
@@ -8259,9 +8254,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
write_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
write_lock(lock);
@@ -8642,14 +8635,6 @@ void __init init_idle(struct task_struct *idle, int cpu)
__sched_fork(0, idle);
- /*
- * The idle task doesn't need the kthread struct to function, but it
- * is dressed up as a per-CPU kthread and thus needs to play the part
- * if we want to avoid special-casing it in code that deals with per-CPU
- * kthreads.
- */
- set_kthread_struct(idle);
-
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_rq_lock(rq);
@@ -9469,6 +9454,14 @@ void __init sched_init(void)
enter_lazy_tlb(&init_mm, current);
/*
+ * The idle task doesn't need the kthread struct to function, but it
+ * is dressed up as a per-CPU kthread and thus needs to play the part
+ * if we want to avoid special-casing it in code that deals with per-CPU
+ * kthreads.
+ */
+ WARN_ON(!set_kthread_struct(current));
+
+ /*
* Make us the idle thread. Technically, schedule() should not be
* called from this thread, however somewhere below it might be,
* but because we are the idle thread, we just pick up running again
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index 1fb45672ec85..c8746a9a7ada 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq)
rq_i = cpu_rq(i);
p = rq_i->core_pick ?: rq_i->curr;
- if (!p->core_cookie)
+ if (p == rq_i->idle)
continue;
__schedstat_add(p->stats.core_forceidle_sum, delta);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 095b0aa378df..5146163bfabb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- u32 divider = get_pelt_divider(&se->avg);
sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
#else
static inline void
@@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se,
se->avg.last_update_time = n_last_update_time;
}
-
/*
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
* propagate its contribution. The key to this propagation is the invariant
@@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se,
* XXX: only do this for the part of runnable > running ?
*
*/
-
static inline void
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
*/
divider = get_pelt_divider(&cfs_rq->avg);
+
/* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg;
- se->avg.util_sum = se->avg.util_avg * divider;
+ new_sum = se->avg.util_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.util_sum;
+ se->avg.util_sum = new_sum;
/* Update parent cfs_rq utilization */
- add_positive(&cfs_rq->avg.util_avg, delta);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ add_positive(&cfs_rq->avg.util_avg, delta_avg);
+ add_positive(&cfs_rq->avg.util_sum, delta_sum);
+
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
/* Set new sched_entity's runnable */
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
- se->avg.runnable_sum = se->avg.runnable_avg * divider;
+ new_sum = se->avg.runnable_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.runnable_sum;
+ se->avg.runnable_sum = new_sum;
/* Update parent cfs_rq runnable */
- add_positive(&cfs_rq->avg.runnable_avg, delta);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ add_positive(&cfs_rq->avg.runnable_avg, delta_avg);
+ add_positive(&cfs_rq->avg.runnable_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+ long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
unsigned long load_avg;
u64 load_sum = 0;
+ s64 delta_sum;
u32 divider;
if (!runnable_sum)
@@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
* assuming all tasks are equally runnable.
*/
if (scale_load_down(gcfs_rq->load.weight)) {
- load_sum = div_s64(gcfs_rq->avg.load_sum,
+ load_sum = div_u64(gcfs_rq->avg.load_sum,
scale_load_down(gcfs_rq->load.weight));
}
@@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
runnable_sum = max(runnable_sum, running_sum);
- load_sum = (s64)se_weight(se) * runnable_sum;
- load_avg = div_s64(load_sum, divider);
-
- se->avg.load_sum = runnable_sum;
+ load_sum = se_weight(se) * runnable_sum;
+ load_avg = div_u64(load_sum, divider);
- delta = load_avg - se->avg.load_avg;
- if (!delta)
+ delta_avg = load_avg - se->avg.load_avg;
+ if (!delta_avg)
return;
- se->avg.load_avg = load_avg;
+ delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
- add_positive(&cfs_rq->avg.load_avg, delta);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ se->avg.load_sum = runnable_sum;
+ se->avg.load_avg = load_avg;
+ add_positive(&cfs_rq->avg.load_avg, delta_avg);
+ add_positive(&cfs_rq->avg.load_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
- * Returns true if the load decayed or we removed load.
+ * Return: true if the load decayed or we removed load.
*
* Since both these conditions indicate a changed cfs_rq->avg.load we should
* call update_tg_load_avg() when this function returns true.
@@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
r = removed_load;
sub_positive(&sa->load_avg, r);
- sa->load_sum = sa->load_avg * divider;
+ sub_positive(&sa->load_sum, r * divider);
+ /* See sa->util_sum below */
+ sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER);
r = removed_util;
sub_positive(&sa->util_avg, r);
- sa->util_sum = sa->util_avg * divider;
+ sub_positive(&sa->util_sum, r * divider);
+ /*
+ * Because of rounding, se->util_sum might ends up being +1 more than
+ * cfs->util_sum. Although this is not a problem by itself, detaching
+ * a lot of tasks with the rounding problem between 2 updates of
+ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
+ * cfs_util_avg is not.
+ * Check that util_sum is still above its lower bound for the new
+ * util_avg. Given that period_contrib might have moved since the last
+ * sync, we are only sure that util_sum must be above or equal to
+ * util_avg * minimum possible divider
+ */
+ sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
r = removed_runnable;
sub_positive(&sa->runnable_avg, r);
- sa->runnable_sum = sa->runnable_avg * divider;
+ sub_positive(&sa->runnable_sum, r * divider);
+ /* See sa->util_sum above */
+ sa->runnable_sum = max_t(u32, sa->runnable_sum,
+ sa->runnable_avg * PELT_MIN_DIVIDER);
/*
* removed_runnable is the unweighted version of removed_load so we
@@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
*/
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- /*
- * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
- * See ___update_load_avg() for details.
- */
- u32 divider = get_pelt_divider(&cfs_rq->avg);
-
dequeue_load_avg(cfs_rq, se);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
+
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
@@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct,
*
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
* of @dst_cpu are idle and @sg has lower priority.
+ *
+ * Return: true if @dst_cpu can pull tasks, false otherwise.
*/
static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sg_lb_stats *sgs,
@@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
+ * @sds: Load-balancing data with statistics of the local group.
* @group: sched_group whose statistics are to be updated.
* @sgs: variable to hold the statistics for this group.
* @sg_status: Holds flag indicating the status of the sched_group
@@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
/**
* find_busiest_group - Returns the busiest group within the sched_domain
* if there is an imbalance.
+ * @env: The load balancing environment.
*
* Also calculates the amount of runnable load which should be moved
* to restore balance.
*
- * @env: The load balancing environment.
- *
* Return: - The busiest group if imbalance exists.
*/
static struct sched_group *find_busiest_group(struct lb_env *env)
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index e06071bf3472..c336f5f481bc 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
}
#endif
+#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
+
static inline u32 get_pelt_divider(struct sched_avg *avg)
{
- return LOAD_AVG_MAX - 1024 + avg->period_contrib;
+ return PELT_MIN_DIVIDER + avg->period_contrib;
}
static inline void cfs_se_util_change(struct sched_avg *avg)
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index a679613a7cb7..c137c4d6983e 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
- kref_init(&t->refcount);
mutex_lock(&group->trigger_lock);
@@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
return t;
}
-static void psi_trigger_destroy(struct kref *ref)
+void psi_trigger_destroy(struct psi_trigger *t)
{
- struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
- struct psi_group *group = t->group;
+ struct psi_group *group;
struct task_struct *task_to_destroy = NULL;
- if (static_branch_likely(&psi_disabled))
+ /*
+ * We do not check psi_disabled since it might have been disabled after
+ * the trigger got created.
+ */
+ if (!t)
return;
+ group = t->group;
/*
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
* from under a polling process.
@@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct kref *ref)
mutex_unlock(&group->trigger_lock);
/*
- * Wait for both *trigger_ptr from psi_trigger_replace and
- * poll_task RCUs to complete their read-side critical sections
- * before destroying the trigger and optionally the poll_task
+ * Wait for psi_schedule_poll_work RCU to complete its read-side
+ * critical section before destroying the trigger and optionally the
+ * poll_task.
*/
synchronize_rcu();
/*
@@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct kref *ref)
kfree(t);
}
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
-{
- struct psi_trigger *old = *trigger_ptr;
-
- if (static_branch_likely(&psi_disabled))
- return;
-
- rcu_assign_pointer(*trigger_ptr, new);
- if (old)
- kref_put(&old->refcount, psi_trigger_destroy);
-}
-
__poll_t psi_trigger_poll(void **trigger_ptr,
struct file *file, poll_table *wait)
{
@@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (static_branch_likely(&psi_disabled))
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- rcu_read_lock();
-
- t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
- if (!t) {
- rcu_read_unlock();
+ t = smp_load_acquire(trigger_ptr);
+ if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- }
- kref_get(&t->refcount);
-
- rcu_read_unlock();
poll_wait(file, &t->event_wait, wait);
if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;
- kref_put(&t->refcount, psi_trigger_destroy);
-
return ret;
}
@@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
buf[buf_size - 1] = '\0';
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
seq = file->private_data;
+
/* Take seq->lock to protect seq->private from concurrent writes */
mutex_lock(&seq->lock);
- psi_trigger_replace(&seq->private, new);
+
+ /* Allow only one trigger per file descriptor */
+ if (seq->private) {
+ mutex_unlock(&seq->lock);
+ return -EBUSY;
+ }
+
+ new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ if (IS_ERR(new)) {
+ mutex_unlock(&seq->lock);
+ return PTR_ERR(new);
+ }
+
+ smp_store_release(&seq->private, new);
mutex_unlock(&seq->lock);
return nbytes;
@@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- psi_trigger_replace(&seq->private, NULL);
+ psi_trigger_destroy(seq->private);
return single_release(inode, file);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index dfcee3888b00..38602738866e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -626,7 +626,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
*
* All callers have to hold the siglock.
*/
-int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info)
+int dequeue_signal(struct task_struct *tsk, sigset_t *mask,
+ kernel_siginfo_t *info, enum pid_type *type)
{
bool resched_timer = false;
int signr;
@@ -634,8 +635,10 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
+ *type = PIDTYPE_PID;
signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
if (!signr) {
+ *type = PIDTYPE_TGID;
signr = __dequeue_signal(&tsk->signal->shared_pending,
mask, info, &resched_timer);
#ifdef CONFIG_POSIX_TIMERS
@@ -903,8 +906,8 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
struct task_struct *t;
sigset_t flush;
- if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
- if (!(signal->flags & SIGNAL_GROUP_EXIT))
+ if (signal->flags & SIGNAL_GROUP_EXIT) {
+ if (signal->core_state)
return sig == SIGKILL;
/*
* The process is in the middle of dying, nothing to do.
@@ -1029,7 +1032,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
* then start taking the whole group down immediately.
*/
if (sig_fatal(p, sig) &&
- !(signal->flags & SIGNAL_GROUP_EXIT) &&
+ (signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT)) &&
!sigismember(&t->real_blocked, sig) &&
(sig == SIGKILL || !p->ptrace)) {
/*
@@ -1820,6 +1823,7 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
* force_sig_seccomp - signals the task to allow in-process syscall emulation
* @syscall: syscall number to send to userland
* @reason: filter-supplied reason code to send to userland (via si_errno)
+ * @force_coredump: true to trigger a coredump
*
* Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
*/
@@ -2383,7 +2387,8 @@ static bool do_signal_stop(int signr)
WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK);
if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) ||
- unlikely(signal_group_exit(sig)))
+ unlikely(sig->flags & SIGNAL_GROUP_EXIT) ||
+ unlikely(sig->group_exec_task))
return false;
/*
* There is no group stop already in progress. We must
@@ -2544,7 +2549,7 @@ static void do_freezer_trap(void)
freezable_schedule();
}
-static int ptrace_signal(int signr, kernel_siginfo_t *info)
+static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
{
/*
* We do not check sig_kernel_stop(signr) but set this marker
@@ -2584,8 +2589,9 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info)
}
/* If the (new) signal is now blocked, requeue it. */
- if (sigismember(&current->blocked, signr)) {
- send_signal(signr, info, current, PIDTYPE_PID);
+ if (sigismember(&current->blocked, signr) ||
+ fatal_signal_pending(current)) {
+ send_signal(signr, info, current, type);
signr = 0;
}
@@ -2684,18 +2690,20 @@ relock:
goto relock;
}
- /* Has this task already been marked for death? */
- if (signal_group_exit(signal)) {
- ksig->info.si_signo = signr = SIGKILL;
- sigdelset(&current->pending.signal, SIGKILL);
- trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
- &sighand->action[SIGKILL - 1]);
- recalc_sigpending();
- goto fatal;
- }
-
for (;;) {
struct k_sigaction *ka;
+ enum pid_type type;
+
+ /* Has this task already been marked for death? */
+ if ((signal->flags & SIGNAL_GROUP_EXIT) ||
+ signal->group_exec_task) {
+ ksig->info.si_signo = signr = SIGKILL;
+ sigdelset(&current->pending.signal, SIGKILL);
+ trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
+ &sighand->action[SIGKILL - 1]);
+ recalc_sigpending();
+ goto fatal;
+ }
if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
do_signal_stop(0))
@@ -2728,16 +2736,18 @@ relock:
* so that the instruction pointer in the signal stack
* frame points to the faulting instruction.
*/
+ type = PIDTYPE_PID;
signr = dequeue_synchronous_signal(&ksig->info);
if (!signr)
- signr = dequeue_signal(current, &current->blocked, &ksig->info);
+ signr = dequeue_signal(current, &current->blocked,
+ &ksig->info, &type);
if (!signr)
break; /* will return 0 */
if (unlikely(current->ptrace) && (signr != SIGKILL) &&
!(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
- signr = ptrace_signal(signr, &ksig->info);
+ signr = ptrace_signal(signr, &ksig->info, type);
if (!signr)
continue;
}
@@ -2863,13 +2873,13 @@ out:
}
/**
- * signal_delivered -
+ * signal_delivered - called after signal delivery to update blocked signals
* @ksig: kernel signal struct
* @stepping: nonzero if debugger single-step or block-step in use
*
* This function should be called when a signal has successfully been
* delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask
- * is always blocked, and the signal itself is blocked unless %SA_NODEFER
+ * is always blocked), and the signal itself is blocked unless %SA_NODEFER
* is set in @ksig->ka.sa.sa_flags. Tracing is notified.
*/
static void signal_delivered(struct ksignal *ksig, int stepping)
@@ -2942,7 +2952,7 @@ void exit_signals(struct task_struct *tsk)
*/
cgroup_threadgroup_change_begin(tsk);
- if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
+ if (thread_group_empty(tsk) || (tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
tsk->flags |= PF_EXITING;
cgroup_threadgroup_change_end(tsk);
return;
@@ -3562,6 +3572,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
ktime_t *to = NULL, timeout = KTIME_MAX;
struct task_struct *tsk = current;
sigset_t mask = *which;
+ enum pid_type type;
int sig, ret = 0;
if (ts) {
@@ -3578,7 +3589,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
signotset(&mask);
spin_lock_irq(&tsk->sighand->siglock);
- sig = dequeue_signal(tsk, &mask, info);
+ sig = dequeue_signal(tsk, &mask, info, &type);
if (!sig && timeout) {
/*
* None ready, temporarily unblock those we're interested
@@ -3597,7 +3608,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, &tsk->real_blocked);
sigemptyset(&tsk->real_blocked);
- sig = dequeue_signal(tsk, &mask, info);
+ sig = dequeue_signal(tsk, &mask, info, &type);
}
spin_unlock_irq(&tsk->sighand->siglock);
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index ce161a8e8d97..66b8af394e58 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -16,11 +16,13 @@
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
#include <linux/jump_label.h>
#include <linux/sysctl.h>
+#include <linux/init.h>
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
-int stack_erasing_sysctl(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+#ifdef CONFIG_SYSCTL
+static int stack_erasing_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
int state = !static_branch_unlikely(&stack_erasing_bypass);
@@ -42,6 +44,26 @@ int stack_erasing_sysctl(struct ctl_table *table, int write,
state ? "enabled" : "disabled");
return ret;
}
+static struct ctl_table stackleak_sysctls[] = {
+ {
+ .procname = "stack_erasing",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = stack_erasing_sysctl,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static int __init stackleak_sysctls_init(void)
+{
+ register_sysctl_init("kernel", stackleak_sysctls);
+ return 0;
+}
+late_initcall(stackleak_sysctls_init);
+#endif /* CONFIG_SYSCTL */
#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
#else
diff --git a/kernel/sys.c b/kernel/sys.c
index 8fdac0d90504..ecc4cf019242 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -220,7 +220,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
niceval = MAX_NICE;
rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -235,9 +234,11 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -249,16 +250,15 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
error = set_one_prio(p, niceval, error);
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* For find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();
out:
return error;
@@ -283,7 +283,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
return -EINVAL;
rcu_read_lock();
- read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
@@ -301,11 +300,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
@@ -317,19 +318,18 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
if (!user)
goto out_unlock; /* No processes for this user */
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
}
- } while_each_thread(g, p);
+ }
if (!uid_eq(uid, cred->uid))
free_uid(user); /* for find_user() */
break;
}
out_unlock:
- read_unlock(&tasklist_lock);
rcu_read_unlock();
return retval;
@@ -2261,6 +2261,66 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
+#ifdef CONFIG_ANON_VMA_NAME
+
+#define ANON_VMA_NAME_MAX_LEN 80
+#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]"
+
+static inline bool is_valid_name_char(char ch)
+{
+ /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */
+ return ch > 0x1f && ch < 0x7f &&
+ !strchr(ANON_VMA_NAME_INVALID_CHARS, ch);
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long addr,
+ unsigned long size, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ const char __user *uname;
+ char *name, *pch;
+ int error;
+
+ switch (opt) {
+ case PR_SET_VMA_ANON_NAME:
+ uname = (const char __user *)arg;
+ if (uname) {
+ name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
+
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ for (pch = name; *pch != '\0'; pch++) {
+ if (!is_valid_name_char(*pch)) {
+ kfree(name);
+ return -EINVAL;
+ }
+ }
+ } else {
+ /* Reset the name */
+ name = NULL;
+ }
+
+ mmap_write_lock(mm);
+ error = madvise_set_anon_name(mm, addr, size, name);
+ mmap_write_unlock(mm);
+ kfree(name);
+ break;
+ default:
+ error = -EINVAL;
+ }
+
+ return error;
+}
+
+#else /* CONFIG_ANON_VMA_NAME */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long size, unsigned long arg)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_ANON_VMA_NAME */
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2530,6 +2590,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = sched_core_share_pid(arg2, arg3, arg4, arg5);
break;
#endif
+ case PR_SET_VMA:
+ error = prctl_set_vma(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d1944258cfc0..a492f159624f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -297,6 +297,7 @@ COND_SYSCALL(get_mempolicy);
COND_SYSCALL(set_mempolicy);
COND_SYSCALL(migrate_pages);
COND_SYSCALL(move_pages);
+COND_SYSCALL(set_mempolicy_home_node);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d7ed1dffa426..5ae443b2882e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -20,7 +20,6 @@
*/
#include <linux/module.h>
-#include <linux/aio.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
@@ -50,7 +49,6 @@
#include <linux/times.h>
#include <linux/limits.h>
#include <linux/dcache.h>
-#include <linux/dnotify.h>
#include <linux/syscalls.h>
#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
@@ -58,19 +56,15 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/perf_event.h>
-#include <linux/kprobes.h>
-#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
-#include <linux/sched/coredump.h>
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
-#include <linux/coredump.h>
#include <linux/latencytop.h>
#include <linux/pid.h>
#include <linux/delayacct.h>
@@ -97,64 +91,21 @@
#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
#include <linux/lockdep.h>
#endif
-#ifdef CONFIG_CHR_DEV_SG
-#include <scsi/sg.h>
-#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-#include <linux/stackleak.h>
-#endif
-#ifdef CONFIG_LOCKUP_DETECTOR
-#include <linux/nmi.h>
-#endif
#if defined(CONFIG_SYSCTL)
/* Constants used for minimum and maximum */
-#ifdef CONFIG_LOCKUP_DETECTOR
-static int sixty = 60;
-#endif
-
-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
-static unsigned long zero_ul;
-static unsigned long one_ul = 1;
-static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int two_hundred = 200;
-static int one_thousand = 1000;
-#ifdef CONFIG_PRINTK
-static int ten_thousand = 10000;
-#endif
+
#ifdef CONFIG_PERF_EVENTS
-static int six_hundred_forty_kb = 640 * 1024;
+static const int six_hundred_forty_kb = 640 * 1024;
#endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
-static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-
-/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
-static int maxolduid = 65535;
-static int minolduid;
+static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-static int ngroups_max = NGROUPS_MAX;
+static const int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-/*
- * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
- * and hung_task_check_interval_secs
- */
-#ifdef CONFIG_DETECT_HUNG_TASK
-static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
-#endif
-
-#ifdef CONFIG_INOTIFY_USER
-#include <linux/inotify.h>
-#endif
-#ifdef CONFIG_FANOTIFY
-#include <linux/fanotify.h>
-#endif
-
#ifdef CONFIG_PROC_SYSCTL
/**
@@ -191,8 +142,8 @@ int sysctl_legacy_va_layout;
#endif
#ifdef CONFIG_COMPACTION
-static int min_extfrag_threshold;
-static int max_extfrag_threshold = 1000;
+/* min_extfrag_threshold is SYSCTL_ZERO */;
+static const int max_extfrag_threshold = 1000;
#endif
#endif /* CONFIG_SYSCTL */
@@ -803,12 +754,12 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
}
-static int do_proc_douintvec(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos,
- int (*conv)(unsigned long *lvalp,
- unsigned int *valp,
- int write, void *data),
- void *data)
+int do_proc_douintvec(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
{
return __do_proc_douintvec(table->data, table, write,
buffer, lenp, ppos, conv, data);
@@ -937,17 +888,6 @@ static int proc_taint(struct ctl_table *table, int write,
return err;
}
-#ifdef CONFIG_PRINTK
-static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- if (write && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-#endif
-
/**
* struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
* @min: pointer to minimum allowable value
@@ -1143,67 +1083,6 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
}
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
-static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
- unsigned int *valp,
- int write, void *data)
-{
- if (write) {
- unsigned int val;
-
- val = round_pipe_size(*lvalp);
- if (val == 0)
- return -EINVAL;
-
- *valp = val;
- } else {
- unsigned int val = *valp;
- *lvalp = (unsigned long) val;
- }
-
- return 0;
-}
-
-static int proc_dopipe_max_size(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- return do_proc_douintvec(table, write, buffer, lenp, ppos,
- do_proc_dopipe_max_size_conv, NULL);
-}
-
-static void validate_coredump_safety(void)
-{
-#ifdef CONFIG_COREDUMP
- if (suid_dumpable == SUID_DUMP_ROOT &&
- core_pattern[0] != '/' && core_pattern[0] != '|') {
- printk(KERN_WARNING
-"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
-"Pipe handler or fully qualified core dump path required.\n"
-"Set kernel.core_pattern before fs.suid_dumpable.\n"
- );
- }
-#endif
-}
-
-static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (!error)
- validate_coredump_safety();
- return error;
-}
-
-#ifdef CONFIG_COREDUMP
-static int proc_dostring_coredump(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int error = proc_dostring(table, write, buffer, lenp, ppos);
- if (!error)
- validate_coredump_safety();
- return error;
-}
-#endif
-
#ifdef CONFIG_MAGIC_SYSRQ
static int sysrq_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -1266,10 +1145,11 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
err = proc_get_long(&p, &left, &val, &neg,
proc_wspace_sep,
sizeof(proc_wspace_sep), NULL);
- if (err)
+ if (err || neg) {
+ err = -EINVAL;
break;
- if (neg)
- continue;
+ }
+
val = convmul * val / convdiv;
if ((min && val < *min) || (max && val > *max)) {
err = -EINVAL;
@@ -1927,29 +1807,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#ifdef CONFIG_COREDUMP
- {
- .procname = "core_uses_pid",
- .data = &core_uses_pid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "core_pattern",
- .data = core_pattern,
- .maxlen = CORENAME_MAX_SIZE,
- .mode = 0644,
- .proc_handler = proc_dostring_coredump,
- },
- {
- .procname = "core_pipe_limit",
- .data = &core_pipe_limit,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
@@ -1963,7 +1820,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
+ .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
#endif
@@ -2130,15 +1987,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dostring,
},
#endif
-#ifdef CONFIG_CHR_DEV_SG
- {
- .procname = "sg-big-buff",
- .data = &sg_big_buff,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
{
.procname = "acct",
@@ -2174,30 +2022,18 @@ static struct ctl_table kern_table[] = {
.proc_handler = sysctl_max_threads,
},
{
- .procname = "random",
- .mode = 0555,
- .child = random_table,
- },
- {
.procname = "usermodehelper",
.mode = 0555,
.child = usermodehelper_table,
},
-#ifdef CONFIG_FW_LOADER_USER_HELPER
- {
- .procname = "firmware_config",
- .mode = 0555,
- .child = firmware_config_table,
- },
-#endif
{
.procname = "overflowuid",
.data = &overflowuid,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
},
{
.procname = "overflowgid",
@@ -2205,8 +2041,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
},
#ifdef CONFIG_S390
{
@@ -2251,66 +2087,9 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
-#if defined CONFIG_PRINTK
- {
- .procname = "printk",
- .data = &console_loglevel,
- .maxlen = 4*sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "printk_ratelimit",
- .data = &printk_ratelimit_state.interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "printk_ratelimit_burst",
- .data = &printk_ratelimit_state.burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "printk_delay",
- .data = &printk_delay_msec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &ten_thousand,
- },
- {
- .procname = "printk_devkmsg",
- .data = devkmsg_log_str,
- .maxlen = DEVKMSG_STR_MAX_SIZE,
- .mode = 0644,
- .proc_handler = devkmsg_sysctl_set_loglvl,
- },
- {
- .procname = "dmesg_restrict",
- .data = &dmesg_restrict,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "kptr_restrict",
- .data = &kptr_restrict,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_sysadmin,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
-#endif
{
.procname = "ngroups_max",
- .data = &ngroups_max,
+ .data = (void *)&ngroups_max,
.maxlen = sizeof (int),
.mode = 0444,
.proc_handler = proc_dointvec,
@@ -2322,96 +2101,6 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_LOCKUP_DETECTOR)
- {
- .procname = "watchdog",
- .data = &watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "watchdog_thresh",
- .data = &watchdog_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_watchdog_thresh,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &sixty,
- },
- {
- .procname = "nmi_watchdog",
- .data = &nmi_watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = NMI_WATCHDOG_SYSCTL_PERM,
- .proc_handler = proc_nmi_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "watchdog_cpumask",
- .data = &watchdog_cpumask_bits,
- .maxlen = NR_CPUS,
- .mode = 0644,
- .proc_handler = proc_watchdog_cpumask,
- },
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
- {
- .procname = "soft_watchdog",
- .data = &soft_watchdog_user_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_soft_watchdog,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "softlockup_panic",
- .data = &softlockup_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#ifdef CONFIG_SMP
- {
- .procname = "softlockup_all_cpu_backtrace",
- .data = &sysctl_softlockup_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
- {
- .procname = "hardlockup_panic",
- .data = &hardlockup_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#ifdef CONFIG_SMP
- {
- .procname = "hardlockup_all_cpu_backtrace",
- .data = &sysctl_hardlockup_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname = "unknown_nmi_panic",
@@ -2514,60 +2203,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
-#ifdef CONFIG_SMP
- {
- .procname = "hung_task_all_cpu_backtrace",
- .data = &sysctl_hung_task_all_cpu_backtrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif /* CONFIG_SMP */
- {
- .procname = "hung_task_panic",
- .data = &sysctl_hung_task_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "hung_task_check_count",
- .data = &sysctl_hung_task_check_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "hung_task_timeout_secs",
- .data = &sysctl_hung_task_timeout_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_check_interval_secs",
- .data = &sysctl_hung_task_check_interval_secs,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_dohung_task_timeout_secs,
- .extra2 = &hung_task_timeout_max,
- },
- {
- .procname = "hung_task_warnings",
- .data = &sysctl_hung_task_warnings,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
- },
-#endif
#ifdef CONFIG_RT_MUTEXES
{
.procname = "max_lock_depth",
@@ -2627,7 +2262,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "perf_event_max_stack",
@@ -2636,7 +2271,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &six_hundred_forty_kb,
+ .extra2 = (void *)&six_hundred_forty_kb,
},
{
.procname = "perf_event_max_contexts_per_stack",
@@ -2645,7 +2280,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
#endif
{
@@ -2676,7 +2311,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
@@ -2708,17 +2343,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
- {
- .procname = "stack_erasing",
- .data = NULL,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = stack_erasing_sysctl,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
{ }
};
@@ -2730,7 +2354,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "panic_on_oom",
@@ -2739,7 +2363,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "oom_kill_allocating_task",
@@ -2784,7 +2408,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_background_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_background_bytes",
@@ -2792,7 +2416,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(dirty_background_bytes),
.mode = 0644,
.proc_handler = dirty_background_bytes_handler,
- .extra1 = &one_ul,
+ .extra1 = SYSCTL_LONG_ONE,
},
{
.procname = "dirty_ratio",
@@ -2801,7 +2425,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_bytes",
@@ -2809,7 +2433,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(vm_dirty_bytes),
.mode = 0644,
.proc_handler = dirty_bytes_handler,
- .extra1 = &dirty_bytes_min,
+ .extra1 = (void *)&dirty_bytes_min,
},
{
.procname = "dirty_writeback_centisecs",
@@ -2841,7 +2465,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two_hundred,
+ .extra2 = SYSCTL_TWO_HUNDRED,
},
#ifdef CONFIG_HUGETLB_PAGE
{
@@ -2898,7 +2522,7 @@ static struct ctl_table vm_table[] = {
.mode = 0200,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
#ifdef CONFIG_COMPACTION
{
@@ -2915,7 +2539,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "extfrag_threshold",
@@ -2923,8 +2547,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_extfrag_threshold,
- .extra2 = &max_extfrag_threshold,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&max_extfrag_threshold,
},
{
.procname = "compact_unevictable_allowed",
@@ -2960,7 +2584,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = watermark_scale_factor_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &one_thousand,
+ .extra2 = SYSCTL_THREE_THOUSAND,
},
{
.procname = "percpu_pagelist_high_fraction",
@@ -3039,7 +2663,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "min_slab_ratio",
@@ -3048,7 +2672,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_slab_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
#endif
#ifdef CONFIG_SMP
@@ -3182,221 +2806,6 @@ static struct ctl_table vm_table[] = {
{ }
};
-static struct ctl_table fs_table[] = {
- {
- .procname = "inode-nr",
- .data = &inodes_stat,
- .maxlen = 2*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "inode-state",
- .data = &inodes_stat,
- .maxlen = 7*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_inodes,
- },
- {
- .procname = "file-nr",
- .data = &files_stat,
- .maxlen = sizeof(files_stat),
- .mode = 0444,
- .proc_handler = proc_nr_files,
- },
- {
- .procname = "file-max",
- .data = &files_stat.max_files,
- .maxlen = sizeof(files_stat.max_files),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero_ul,
- .extra2 = &long_max,
- },
- {
- .procname = "nr_open",
- .data = &sysctl_nr_open,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &sysctl_nr_open_min,
- .extra2 = &sysctl_nr_open_max,
- },
- {
- .procname = "dentry-state",
- .data = &dentry_stat,
- .maxlen = 6*sizeof(long),
- .mode = 0444,
- .proc_handler = proc_nr_dentry,
- },
- {
- .procname = "overflowuid",
- .data = &fs_overflowuid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
- },
- {
- .procname = "overflowgid",
- .data = &fs_overflowgid,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &minolduid,
- .extra2 = &maxolduid,
- },
-#ifdef CONFIG_FILE_LOCKING
- {
- .procname = "leases-enable",
- .data = &leases_enable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_DNOTIFY
- {
- .procname = "dir-notify-enable",
- .data = &dir_notify_enable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_MMU
-#ifdef CONFIG_FILE_LOCKING
- {
- .procname = "lease-break-time",
- .data = &lease_break_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
-#ifdef CONFIG_AIO
- {
- .procname = "aio-nr",
- .data = &aio_nr,
- .maxlen = sizeof(aio_nr),
- .mode = 0444,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "aio-max-nr",
- .data = &aio_max_nr,
- .maxlen = sizeof(aio_max_nr),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
-#endif /* CONFIG_AIO */
-#ifdef CONFIG_INOTIFY_USER
- {
- .procname = "inotify",
- .mode = 0555,
- .child = inotify_table,
- },
-#endif
-#ifdef CONFIG_FANOTIFY
- {
- .procname = "fanotify",
- .mode = 0555,
- .child = fanotify_table,
- },
-#endif
-#ifdef CONFIG_EPOLL
- {
- .procname = "epoll",
- .mode = 0555,
- .child = epoll_table,
- },
-#endif
-#endif
- {
- .procname = "protected_symlinks",
- .data = &sysctl_protected_symlinks,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "protected_hardlinks",
- .data = &sysctl_protected_hardlinks,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- {
- .procname = "protected_fifos",
- .data = &sysctl_protected_fifos,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
- {
- .procname = "protected_regular",
- .data = &sysctl_protected_regular,
- .maxlen = sizeof(int),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
- {
- .procname = "suid_dumpable",
- .data = &suid_dumpable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax_coredump,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
- {
- .procname = "binfmt_misc",
- .mode = 0555,
- .child = sysctl_mount_point,
- },
-#endif
- {
- .procname = "pipe-max-size",
- .data = &pipe_max_size,
- .maxlen = sizeof(pipe_max_size),
- .mode = 0644,
- .proc_handler = proc_dopipe_max_size,
- },
- {
- .procname = "pipe-user-pages-hard",
- .data = &pipe_user_pages_hard,
- .maxlen = sizeof(pipe_user_pages_hard),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "pipe-user-pages-soft",
- .data = &pipe_user_pages_soft,
- .maxlen = sizeof(pipe_user_pages_soft),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "mount-max",
- .data = &sysctl_mount_max,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- },
- { }
-};
-
static struct ctl_table debug_table[] = {
#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
{
@@ -3407,17 +2816,6 @@ static struct ctl_table debug_table[] = {
.proc_handler = proc_dointvec
},
#endif
-#if defined(CONFIG_OPTPROBES)
- {
- .procname = "kprobes-optimization",
- .data = &sysctl_kprobes_optimization,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_kprobes_optimization_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
{ }
};
@@ -3425,41 +2823,18 @@ static struct ctl_table dev_table[] = {
{ }
};
-static struct ctl_table sysctl_base_table[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = kern_table,
- },
- {
- .procname = "vm",
- .mode = 0555,
- .child = vm_table,
- },
- {
- .procname = "fs",
- .mode = 0555,
- .child = fs_table,
- },
- {
- .procname = "debug",
- .mode = 0555,
- .child = debug_table,
- },
- {
- .procname = "dev",
- .mode = 0555,
- .child = dev_table,
- },
- { }
-};
+DECLARE_SYSCTL_BASE(kernel, kern_table);
+DECLARE_SYSCTL_BASE(vm, vm_table);
+DECLARE_SYSCTL_BASE(debug, debug_table);
+DECLARE_SYSCTL_BASE(dev, dev_table);
-int __init sysctl_init(void)
+int __init sysctl_init_bases(void)
{
- struct ctl_table_header *hdr;
+ register_sysctl_base(kernel);
+ register_sysctl_base(vm);
+ register_sysctl_base(debug);
+ register_sysctl_base(dev);
- hdr = register_sysctl_table(sysctl_base_table);
- kmemleak_not_leak(hdr);
return 0;
}
#endif /* CONFIG_SYSCTL */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b7e52a642948..1cf73807b450 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -285,7 +285,7 @@ static void clocksource_verify_choose_cpus(void)
return;
/* Make sure to select at least one CPU other than the current CPU. */
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
if (cpu == smp_processor_id())
cpu = cpumask_next(cpu, cpu_online_mask);
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
@@ -307,7 +307,7 @@ static void clocksource_verify_choose_cpus(void)
cpu = prandom_u32() % nr_cpu_ids;
cpu = cpumask_next(cpu - 1, cpu_online_mask);
if (cpu >= nr_cpu_ids)
- cpu = cpumask_next(-1, cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
cpumask_set_cpu(cpu, &cpus_chosen);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 420ff4bc67fd..752ed89a293b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,13 @@ config HAVE_C_RECORDMCOUNT
help
C version of recordmcount available?
+config BUILDTIME_MCOUNT_SORT
+ bool
+ default y
+ depends on BUILDTIME_TABLE_SORT && !S390
+ help
+ Sort the mcount_loc section at build time.
+
config TRACER_MAX_TRACE
bool
@@ -915,6 +922,20 @@ config EVENT_TRACE_TEST_SYSCALLS
TBD - enable a way to actually call the syscalls as we test their
events
+config FTRACE_SORT_STARTUP_TEST
+ bool "Verify compile time sorting of ftrace functions"
+ depends on DYNAMIC_FTRACE
+ depends on BUILDTIME_MCOUNT_SORT
+ help
+ Sorting of the mcount_loc sections that is used to find the
+ where the ftrace knows where to patch functions for tracing
+ and other callbacks is done at compile time. But if the sort
+ is not done correctly, it will cause non-deterministic failures.
+ When this is set, the sorted sections will be verified that they
+ are in deed sorted and will warn if they are not.
+
+ If unsure, say N
+
config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index be5f6b32a012..f9feb197b2da 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6394,6 +6394,27 @@ static int ftrace_cmp_ips(const void *a, const void *b)
return 0;
}
+#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+ int i;
+
+ for (i = 1; i < count; i++) {
+ if (WARN(start[i - 1] > start[i],
+ "[%d] %pS at %lx is not sorted with %pS at %lx\n", i,
+ (void *)start[i - 1], start[i - 1],
+ (void *)start[i], start[i]))
+ break;
+ }
+ if (i == count)
+ pr_info("ftrace section at %px sorted properly\n", start);
+}
+#else
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+}
+#endif
+
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
@@ -6412,8 +6433,17 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
- sort(start, count, sizeof(*start),
- ftrace_cmp_ips, NULL);
+ /*
+ * Sorting mcount in vmlinux at build time depend on
+ * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
+ * modules can not be sorted at build time.
+ */
+ if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) {
+ sort(start, count, sizeof(*start),
+ ftrace_cmp_ips, NULL);
+ } else {
+ test_is_sorted(start, count);
+ }
start_pg = ftrace_allocate_pages(count);
if (!start_pg)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2699e9e562b1..05dfc7a12d3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5898,16 +5898,13 @@ static __init int test_ringbuffer(void)
rb_data[cpu].buffer = buffer;
rb_data[cpu].cpu = cpu;
rb_data[cpu].cnt = cpu;
- rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
- "rbtester/%d", cpu);
+ rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu],
+ cpu, "rbtester/%u");
if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
pr_cont("FAILED\n");
ret = PTR_ERR(rb_threads[cpu]);
goto out_free;
}
-
- kthread_bind(rb_threads[cpu], cpu);
- wake_up_process(rb_threads[cpu]);
}
/* Now create the rb hammer! */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78ea542ce3bc..a569a0cb81ee 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -980,6 +980,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
ring_buffer_write(buffer, event->array[0], &event->array[1]);
/* Release the temp buffer */
this_cpu_dec(trace_buffered_event_cnt);
+ /* ring_buffer_unlock_commit() enables preemption */
+ preempt_enable_notrace();
} else
ring_buffer_unlock_commit(buffer, event);
}
@@ -2601,6 +2603,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
trace_flags |= TRACE_FLAG_HARDIRQ;
if (in_serving_softirq())
trace_flags |= TRACE_FLAG_SOFTIRQ;
+ if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
+ trace_flags |= TRACE_FLAG_BH_OFF;
if (tif_need_resched())
trace_flags |= TRACE_FLAG_NEED_RESCHED;
@@ -2745,8 +2749,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
*current_rb = tr->array_buffer.buffer;
if (!tr->no_filter_buffering_ref &&
- (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
- (entry = this_cpu_read(trace_buffered_event))) {
+ (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
+ preempt_disable_notrace();
/*
* Filtering is on, so try to use the per cpu buffer first.
* This buffer will simulate a ring_buffer_event,
@@ -2764,33 +2768,38 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
* is still quicker than no copy on match, but having
* to discard out of the ring buffer on a failed match.
*/
- int max_len = PAGE_SIZE - struct_size(entry, array, 1);
+ if ((entry = __this_cpu_read(trace_buffered_event))) {
+ int max_len = PAGE_SIZE - struct_size(entry, array, 1);
- val = this_cpu_inc_return(trace_buffered_event_cnt);
+ val = this_cpu_inc_return(trace_buffered_event_cnt);
- /*
- * Preemption is disabled, but interrupts and NMIs
- * can still come in now. If that happens after
- * the above increment, then it will have to go
- * back to the old method of allocating the event
- * on the ring buffer, and if the filter fails, it
- * will have to call ring_buffer_discard_commit()
- * to remove it.
- *
- * Need to also check the unlikely case that the
- * length is bigger than the temp buffer size.
- * If that happens, then the reserve is pretty much
- * guaranteed to fail, as the ring buffer currently
- * only allows events less than a page. But that may
- * change in the future, so let the ring buffer reserve
- * handle the failure in that case.
- */
- if (val == 1 && likely(len <= max_len)) {
- trace_event_setup(entry, type, trace_ctx);
- entry->array[0] = len;
- return entry;
+ /*
+ * Preemption is disabled, but interrupts and NMIs
+ * can still come in now. If that happens after
+ * the above increment, then it will have to go
+ * back to the old method of allocating the event
+ * on the ring buffer, and if the filter fails, it
+ * will have to call ring_buffer_discard_commit()
+ * to remove it.
+ *
+ * Need to also check the unlikely case that the
+ * length is bigger than the temp buffer size.
+ * If that happens, then the reserve is pretty much
+ * guaranteed to fail, as the ring buffer currently
+ * only allows events less than a page. But that may
+ * change in the future, so let the ring buffer reserve
+ * handle the failure in that case.
+ */
+ if (val == 1 && likely(len <= max_len)) {
+ trace_event_setup(entry, type, trace_ctx);
+ entry->array[0] = len;
+ /* Return with preemption disabled */
+ return entry;
+ }
+ this_cpu_dec(trace_buffered_event_cnt);
}
- this_cpu_dec(trace_buffered_event_cnt);
+ /* __trace_buffer_lock_reserve() disables preemption */
+ preempt_enable_notrace();
}
entry = __trace_buffer_lock_reserve(*current_rb, type, len,
@@ -4183,7 +4192,7 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
+ "# / _-----=> irqs-off/BH-disabled\n"
"# | / _----=> need-resched \n"
"# || / _---=> hardirq/softirq \n"
"# ||| / _--=> preempt-depth \n"
@@ -4224,7 +4233,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
@@ -4834,6 +4843,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
return 0;
}
+static int tracing_mark_open(struct inode *inode, struct file *filp)
+{
+ stream_open(inode, filp);
+ return tracing_open_generic_tr(inode, filp);
+}
+
static int tracing_release(struct inode *inode, struct file *file)
{
struct trace_array *tr = inode->i_private;
@@ -5635,7 +5650,7 @@ static const char readme_msg[] =
"\t - a numeric literal: e.g. ms_per_sec=1000,\n"
"\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
"\n"
- "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
+ "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
"\t multiplication(*) and division(/) operators. An operand can be either a\n"
"\t variable reference, field or numeric literal.\n"
"\n"
@@ -6718,10 +6733,9 @@ waitagain:
cnt = PAGE_SIZE - 1;
/* reset all but tr, trace, and overruns */
- memset_startat(iter, 0, seq);
+ trace_iterator_reset(iter);
cpumask_clear(iter->started);
trace_seq_init(&iter->seq);
- iter->pos = -1;
trace_event_read_lock();
trace_access_lock(iter->cpu_file);
@@ -7110,9 +7124,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (tt)
event_triggers_post_call(tr->trace_marker_file, tt);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7171,9 +7182,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
__buffer_unlock_commit(buffer, event);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7573,16 +7581,14 @@ static const struct file_operations tracing_free_buffer_fops = {
};
static const struct file_operations tracing_mark_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
static const struct file_operations tracing_mark_raw_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_raw_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 38715aa6cfdf..d038ddbf1bea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -83,6 +83,9 @@ enum trace_type {
#undef __dynamic_array
#define __dynamic_array(type, item) type item[];
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item) type item[];
+
#undef F_STRUCT
#define F_STRUCT(args...) args
@@ -1334,10 +1337,12 @@ __trace_event_discard_commit(struct trace_buffer *buffer,
struct ring_buffer_event *event)
{
if (this_cpu_read(trace_buffered_event) == event) {
- /* Simply release the temp buffer */
+ /* Simply release the temp buffer and enable preemption */
this_cpu_dec(trace_buffered_event_cnt);
+ preempt_enable_notrace();
return;
}
+ /* ring_buffer_discard_commit() enables preemption */
ring_buffer_discard_commit(buffer, event);
}
@@ -1465,6 +1470,7 @@ struct filter_pred {
static inline bool is_string_field(struct ftrace_event_field *field)
{
return field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING ||
field->filter_type == FILTER_STATIC_STRING ||
field->filter_type == FILTER_PTR_STRING ||
field->filter_type == FILTER_COMM;
@@ -1572,15 +1578,13 @@ extern int event_enable_trigger_print(struct seq_file *m,
struct event_trigger_data *data);
extern void event_enable_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data);
-extern int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+extern int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
extern int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
extern void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file);
extern void trigger_data_free(struct event_trigger_data *data);
@@ -1606,6 +1610,30 @@ get_named_trigger_data(struct event_trigger_data *data);
extern int register_event_command(struct event_command *cmd);
extern int unregister_event_command(struct event_command *cmd);
extern int register_trigger_hist_enable_disable_cmds(void);
+extern bool event_trigger_check_remove(const char *glob);
+extern bool event_trigger_empty_param(const char *param);
+extern int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required);
+extern struct event_trigger_data *
+event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data);
+extern int event_trigger_parse_num(char *trigger,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data);
+extern void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *trigger,
+ struct event_trigger_data *trigger_data,
+ int *n_registered);
/**
* struct event_trigger_ops - callbacks for trace event triggers
@@ -1613,10 +1641,20 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* The methods in this structure provide per-event trigger hooks for
* various trigger operations.
*
+ * The @init and @free methods are used during trigger setup and
+ * teardown, typically called from an event_command's @parse()
+ * function implementation.
+ *
+ * The @print method is used to print the trigger spec.
+ *
+ * The @trigger method is the function that actually implements the
+ * trigger and is called in the context of the triggering event
+ * whenever that event occurs.
+ *
* All the methods below, except for @init() and @free(), must be
* implemented.
*
- * @func: The trigger 'probe' function called when the triggering
+ * @trigger: The trigger 'probe' function called when the triggering
* event occurs. The data passed into this callback is the data
* that was supplied to the event_command @reg() function that
* registered the trigger (see struct event_command) along with
@@ -1645,9 +1683,10 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* (see trace_event_triggers.c).
*/
struct event_trigger_ops {
- void (*func)(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *rbe);
+ void (*trigger)(struct event_trigger_data *data,
+ struct trace_buffer *buffer,
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
@@ -1696,7 +1735,7 @@ struct event_trigger_ops {
* All the methods below, except for @set_filter() and @unreg_all(),
* must be implemented.
*
- * @func: The callback function responsible for parsing and
+ * @parse: The callback function responsible for parsing and
* registering the trigger written to the 'trigger' file by the
* user. It allocates the trigger instance and registers it with
* the appropriate trace event. It makes use of the other
@@ -1731,21 +1770,24 @@ struct event_trigger_ops {
*
* @get_trigger_ops: The callback function invoked to retrieve the
* event_trigger_ops implementation associated with the command.
+ * This callback function allows a single event_command to
+ * support multiple trigger implementations via different sets of
+ * event_trigger_ops, depending on the value of the @param
+ * string.
*/
struct event_command {
struct list_head list;
char *name;
enum event_trigger_type trigger_type;
int flags;
- int (*func)(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *params);
+ int (*parse)(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd,
+ char *param_and_filter);
int (*reg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg_all)(struct trace_event_file *file);
@@ -1926,14 +1968,7 @@ extern struct trace_iterator *tracepoint_print_iter;
*/
static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
{
- const size_t offset = offsetof(struct trace_iterator, seq);
-
- /*
- * Keep gcc from complaining about overwriting more than just one
- * member in the structure.
- */
- memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset);
-
+ memset_startat(iter, 0, seq);
iter->pos = -1;
}
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 928867f527e7..191db32dec46 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -489,18 +489,12 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
if (trace_trigger_soft_disabled(edata->file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = edata->file;
-
dsize = get_eprobe_size(&edata->ep->tp, rec);
- fbuffer.regs = NULL;
-
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
- call->event.type,
- sizeof(*entry) + edata->ep->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, edata->file,
+ sizeof(*entry) + edata->ep->tp.size + dsize);
+
+ if (!entry)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
@@ -549,29 +543,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
}
static struct event_trigger_ops eprobe_trigger_ops = {
- .func = eprobe_trigger_func,
+ .trigger = eprobe_trigger_func,
.print = eprobe_trigger_print,
.init = eprobe_trigger_init,
.free = eprobe_trigger_free,
};
-static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
return -1;
}
-static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static int eprobe_trigger_reg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
return -1;
}
-static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static void eprobe_trigger_unreg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
}
@@ -586,7 +580,7 @@ static struct event_command event_trigger_cmd = {
.name = "eprobe",
.trigger_type = ETT_EVENT_EPROBE,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = eprobe_trigger_cmd_func,
+ .parse = eprobe_trigger_cmd_parse,
.reg = eprobe_trigger_reg_func,
.unreg = eprobe_trigger_unreg_func,
.unreg_all = NULL,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 92be9cb1d7d4..3147614c1812 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3461,10 +3461,8 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'enable' entry\n");
+ if (!entry)
return -ENOMEM;
- }
/* There are not as crucial, just warn if they are not created */
@@ -3480,17 +3478,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
/* ring buffer internal formats */
- entry = trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ trace_create_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_page' entry\n");
- entry = trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ trace_create_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_event' entry\n");
tr->event_dir = d_events;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index c9124038b140..b458a9afa2c0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -5,6 +5,7 @@
* Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
*/
+#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
+/* user space strings temp buffer */
+#define USTRING_BUF_SIZE 1024
+
+struct ustring_buffer {
+ char buffer[USTRING_BUF_SIZE];
+};
+
+static __percpu struct ustring_buffer *ustring_per_cpu;
+
+static __always_inline char *test_string(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* For safety, do not trust the string pointer */
+ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
+ return NULL;
+ return kstr;
+}
+
+static __always_inline char *test_ustring(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char __user *ustr;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* user space address? */
+ ustr = (char __user *)str;
+ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
+ return NULL;
+
+ return kstr;
+}
+
/* Filter predicate for fixed sized arrays of characters */
static int filter_pred_string(struct filter_pred *pred, void *event)
{
@@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
return match;
}
-/* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event)
+static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
{
- char **addr = (char **)(event + pred->offset);
int cmp, match;
- int len = strlen(*addr) + 1; /* including tailing '\0' */
+ int len;
- cmp = pred->regex.match(*addr, &pred->regex, len);
+ len = strlen(str) + 1; /* including tailing '\0' */
+ cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not;
return match;
}
+/* Filter predicate for char * pointers */
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_string(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
+
+/* Filter predicate for char * pointers in user space*/
+static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_ustring(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
/*
* Filter predicate for dynamic sized arrays of characters.
@@ -706,6 +777,29 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match;
}
+/*
+ * Filter predicate for relative dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry as same as dynamic string.
+ * The difference is that the relative one records the location offset
+ * from the field itself, not the event entry.
+ */
+static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
+{
+ u32 *item = (u32 *)(event + pred->offset);
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ int str_len = str_item >> 16;
+ char *addr = (char *)(&item[1]) + str_loc;
+ int cmp, match;
+
+ cmp = pred->regex.match(addr, &pred->regex, str_len);
+
+ match = cmp ^ pred->not;
+
+ return match;
+}
+
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
@@ -756,7 +850,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
*
* Note:
* - @str might not be NULL-terminated if it's of type DYN_STRING
- * or STATIC_STRING, unless @len is zero.
+ * RDYN_STRING, or STATIC_STRING, unless @len is zero.
*/
static int regex_match_full(char *str, struct regex *r, int len)
@@ -1083,6 +1177,9 @@ int filter_assign_type(const char *type)
if (strstr(type, "__data_loc") && strstr(type, "char"))
return FILTER_DYN_STRING;
+ if (strstr(type, "__rel_loc") && strstr(type, "char"))
+ return FILTER_RDYN_STRING;
+
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
@@ -1158,6 +1255,7 @@ static int parse_pred(const char *str, void *data,
struct filter_pred *pred = NULL;
char num_buf[24]; /* Big enough to hold an address */
char *field_name;
+ bool ustring = false;
char q;
u64 val;
int len;
@@ -1192,6 +1290,12 @@ static int parse_pred(const char *str, void *data,
return -EINVAL;
}
+ /* See if the field is a user space string */
+ if ((len = str_has_prefix(str + i, ".ustring"))) {
+ ustring = true;
+ i += len;
+ }
+
while (isspace(str[i]))
i++;
@@ -1318,10 +1422,24 @@ static int parse_pred(const char *str, void *data,
pred->fn = filter_pred_string;
pred->regex.field_len = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn = filter_pred_strloc;
- else
- pred->fn = filter_pred_pchar;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ pred->fn = filter_pred_strrelloc;
+ else {
+
+ if (!ustring_per_cpu) {
+ /* Once allocated, keep it around for good */
+ ustring_per_cpu = alloc_percpu(struct ustring_buffer);
+ if (!ustring_per_cpu)
+ goto err_mem;
+ }
+
+ if (ustring)
+ pred->fn = filter_pred_pchar_user;
+ else
+ pred->fn = filter_pred_pchar;
+ }
/* go past the last quote */
i++;
@@ -1387,6 +1505,9 @@ static int parse_pred(const char *str, void *data,
err_free:
kfree(pred);
return -EINVAL;
+err_mem:
+ kfree(pred);
+ return -ENOMEM;
}
enum {
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 319f9c8ca7e7..5e6a988a8a51 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -217,6 +217,20 @@ static u64 hist_field_dynstring(struct hist_field *hist_field,
return (u64)(unsigned long)addr;
}
+static u64 hist_field_reldynstring(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ u32 *item = event + hist_field->field->offset;
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ char *addr = (char *)&item[1] + str_loc;
+
+ return (u64)(unsigned long)addr;
+}
+
static u64 hist_field_pstring(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -1956,8 +1970,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
if (field->filter_type == FILTER_STATIC_STRING) {
hist_field->fn = hist_field_string;
hist_field->size = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
hist_field->fn = hist_field_dynstring;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ hist_field->fn = hist_field_reldynstring;
else
hist_field->fn = hist_field_pstring;
} else {
@@ -2745,9 +2761,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data,
}
static struct event_command trigger_hist_cmd;
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
static bool compatible_keys(struct hist_trigger_data *target_hist_data,
struct hist_trigger_data *hist_data,
@@ -2950,8 +2966,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data,
var_hist->hist_data = hist_data;
/* Create the new histogram with our variable */
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "", "hist", cmd);
if (ret) {
kfree(cmd);
kfree(var_hist->cmd);
@@ -4961,7 +4977,8 @@ static inline void add_to_key(char *compound_key, void *key,
struct ftrace_event_field *field;
field = key_field->field;
- if (field->filter_type == FILTER_DYN_STRING)
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING)
size = *(u32 *)(rec + field->offset) >> 16;
else if (field->filter_type == FILTER_STATIC_STRING)
size = field->size;
@@ -5712,8 +5729,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
for (i = 0; i < hist_data->n_field_var_hists; i++) {
file = hist_data->field_var_hists[i]->hist_data->event_file;
cmd = hist_data->field_var_hists[i]->cmd;
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "!hist", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "!hist", "hist", cmd);
WARN_ON_ONCE(ret < 0);
}
}
@@ -5742,7 +5759,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_init,
.free = event_hist_trigger_free,
@@ -5776,7 +5793,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_named_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_named_init,
.free = event_hist_trigger_named_free,
@@ -5893,7 +5910,7 @@ static bool hist_trigger_match(struct event_trigger_data *data,
return true;
}
-static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
+static int hist_register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6045,7 +6062,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data,
return false;
}
-static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void hist_unregister_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6129,9 +6146,9 @@ static void hist_unreg_all(struct trace_event_file *file)
}
}
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT;
struct event_trigger_data *trigger_data;
@@ -6245,7 +6262,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
@@ -6254,7 +6271,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of triggers registered,
* but if it didn't register any it returns zero. Consider no
@@ -6297,7 +6314,7 @@ enable:
return ret;
out_unreg:
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
out_free:
if (cmd_ops->set_filter)
cmd_ops->set_filter(NULL, trigger_data, NULL);
@@ -6314,7 +6331,7 @@ static struct event_command trigger_hist_cmd = {
.name = "hist",
.trigger_type = ETT_EVENT_HIST,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = event_hist_trigger_func,
+ .parse = event_hist_trigger_parse,
.reg = hist_register_trigger,
.unreg = hist_unregister_trigger,
.unreg_all = hist_unreg_all,
@@ -6366,28 +6383,28 @@ hist_enable_count_trigger(struct event_trigger_data *data,
}
static struct event_trigger_ops hist_enable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_enable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
@@ -6429,7 +6446,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file)
static struct event_command trigger_hist_enable_cmd = {
.name = ENABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
@@ -6440,7 +6457,7 @@ static struct event_command trigger_hist_enable_cmd = {
static struct event_command trigger_hist_disable_cmd = {
.name = DISABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index c188045c5f97..d6b4935a78c0 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -168,10 +168,14 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size)
continue;
if (field->filter_type == FILTER_STATIC_STRING)
continue;
- if (field->filter_type == FILTER_DYN_STRING) {
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
u32 *str_item;
int str_loc = entry_size & 0xffff;
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
+
str_item = (u32 *)(entry + field->offset);
*str_item = str_loc; /* string length is 0. */
} else {
@@ -214,7 +218,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
if (field->filter_type == FILTER_STATIC_STRING) {
strlcpy(entry + field->offset, addr, field->size);
- } else if (field->filter_type == FILTER_DYN_STRING) {
+ } else if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
int str_len = strlen(addr) + 1;
int str_loc = entry_size & 0xffff;
u32 *str_item;
@@ -229,6 +234,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
strlcpy(entry + (entry_size - str_len), addr, str_len);
str_item = (u32 *)(entry + field->offset);
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
*str_item = (str_len << 16) | str_loc;
} else {
char **paddr;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index ca9c13b2ecf4..154db74dadbc 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1979,7 +1979,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val);
/**
* synth_event_add_val - Add a named field's value to an open synth trace
* @field_name: The name of the synthetic event field value to set
- * @val: The value to set the next field to
+ * @val: The value to set the named field to
* @trace_state: A pointer to object tracking the piecewise trace state
*
* Set the value of the named field in an event that's been opened by
@@ -2054,6 +2054,13 @@ static int create_synth_event(const char *raw_command)
last_cmd_set(raw_command);
+ name = raw_command;
+
+ /* Don't try to process if not our system */
+ if (name[0] != 's' || name[1] != ':')
+ return -ECANCELED;
+ name += 2;
+
p = strpbrk(raw_command, " \t");
if (!p) {
synth_err(SYNTH_ERR_INVALID_CMD, 0);
@@ -2062,12 +2069,6 @@ static int create_synth_event(const char *raw_command)
fields = skip_spaces(p);
- name = raw_command;
-
- if (name[0] != 's' || name[1] != ':')
- return -ECANCELED;
- name += 2;
-
/* This interface accepts group name prefix */
if (strchr(name, '/')) {
len = str_has_prefix(name, SYNTH_SYSTEM "/");
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3d5c07239a2a..d00fee705f9c 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file,
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -78,7 +78,7 @@ event_triggers_call(struct trace_event_file *file,
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
}
return tt;
}
@@ -106,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file,
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, NULL, NULL, NULL);
+ data->ops->trigger(data, NULL, NULL, NULL);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -245,7 +245,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
mutex_lock(&trigger_cmd_mutex);
list_for_each_entry(p, &trigger_commands, list) {
if (strcmp(p->name, command) == 0) {
- ret = p->func(p, file, buff, command, next);
+ ret = p->parse(p, file, buff, command, next);
goto out_unlock;
}
}
@@ -540,7 +540,6 @@ void update_cond_flag(struct trace_event_file *file)
/**
* register_trigger - Generic event_command @reg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data to associate with the trigger
* @file: The trace_event_file associated with the event
*
@@ -551,7 +550,7 @@ void update_cond_flag(struct trace_event_file *file)
*
* Return: 0 on success, errno otherwise
*/
-static int register_trigger(char *glob, struct event_trigger_ops *ops,
+static int register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -589,7 +588,6 @@ out:
/**
* unregister_trigger - Generic event_command @unreg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @test: Trigger-specific data used to find the trigger to remove
* @file: The trace_event_file associated with the event
*
@@ -598,7 +596,7 @@ out:
* Usually used directly as the @unreg method in event command
* implementations.
*/
-static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -621,8 +619,350 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
data->ops->free(data->ops, data);
}
+/*
+ * Event trigger parsing helper functions.
+ *
+ * These functions help make it easier to write an event trigger
+ * parsing function i.e. the struct event_command.parse() callback
+ * function responsible for parsing and registering a trigger command
+ * written to the 'trigger' file.
+ *
+ * A trigger command (or just 'trigger' for short) takes the form:
+ * [trigger] [if filter]
+ *
+ * The struct event_command.parse() callback (and other struct
+ * event_command functions) refer to several components of a trigger
+ * command. Those same components are referenced by the event trigger
+ * parsing helper functions defined below. These components are:
+ *
+ * cmd - the trigger command name
+ * glob - the trigger command name optionally prefaced with '!'
+ * param_and_filter - text following cmd and ':'
+ * param - text following cmd and ':' and stripped of filter
+ * filter - the optional filter text following (and including) 'if'
+ *
+ * To illustrate the use of these componenents, here are some concrete
+ * examples. For the following triggers:
+ *
+ * echo 'traceon:5 if pid == 0' > trigger
+ * - 'traceon' is both cmd and glob
+ * - '5 if pid == 0' is the param_and_filter
+ * - '5' is the param
+ * - 'if pid == 0' is the filter
+ *
+ * echo 'enable_event:sys:event:n' > trigger
+ * - 'enable_event' is both cmd and glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'hist:keys=pid if prio > 50' > trigger
+ * - 'hist' is both cmd and glob
+ * - 'keys=pid if prio > 50' is the param_and_filter
+ * - 'keys=pid' is the param
+ * - 'if prio > 50' is the filter
+ *
+ * echo '!enable_event:sys:event:n' > trigger
+ * - 'enable_event' the cmd
+ * - '!enable_event' is the glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'traceoff' > trigger
+ * - 'traceoff' is both cmd and glob
+ * - there is no param_and_filter
+ * - there is no param
+ * - there is no filter
+ *
+ * There are a few different categories of event trigger covered by
+ * these helpers:
+ *
+ * - triggers that don't require a parameter e.g. traceon
+ * - triggers that do require a parameter e.g. enable_event and hist
+ * - triggers that though they may not require a param may support an
+ * optional 'n' param (n = number of times the trigger should fire)
+ * e.g.: traceon:5 or enable_event:sys:event:n
+ * - triggers that do not support an 'n' param e.g. hist
+ *
+ * These functions can be used or ignored as necessary - it all
+ * depends on the complexity of the trigger, and the granularity of
+ * the functions supported reflects the fact that some implementations
+ * may need to customize certain aspects of their implementations and
+ * won't need certain functions. For instance, the hist trigger
+ * implementation doesn't use event_trigger_separate_filter() because
+ * it has special requirements for handling the filter.
+ */
+
+/**
+ * event_trigger_check_remove - check whether an event trigger specifies remove
+ * @glob: The trigger command string, with optional remove(!) operator
+ *
+ * The event trigger callback implementations pass in 'glob' as a
+ * parameter. This is the command name either with or without a
+ * remove(!) operator. This function simply parses the glob and
+ * determines whether the command corresponds to a trigger removal or
+ * a trigger addition.
+ *
+ * Return: true if this is a remove command, false otherwise
+ */
+bool event_trigger_check_remove(const char *glob)
+{
+ return (glob && glob[0] == '!') ? true : false;
+}
+
+/**
+ * event_trigger_empty_param - check whether the param is empty
+ * @param: The trigger param string
+ *
+ * The event trigger callback implementations pass in 'param' as a
+ * parameter. This corresponds to the string following the command
+ * name minus the command name. This function can be called by a
+ * callback implementation for any command that requires a param; a
+ * callback that doesn't require a param can ignore it.
+ *
+ * Return: true if this is an empty param, false otherwise
+ */
+bool event_trigger_empty_param(const char *param)
+{
+ return !param;
+}
+
+/**
+ * event_trigger_separate_filter - separate an event trigger from a filter
+ * @param: The param string containing trigger and possibly filter
+ * @trigger: outparam, will be filled with a pointer to the trigger
+ * @filter: outparam, will be filled with a pointer to the filter
+ * @param_required: Specifies whether or not the param string is required
+ *
+ * Given a param string of the form '[trigger] [if filter]', this
+ * function separates the filter from the trigger and returns the
+ * trigger in *trigger and the filter in *filter. Either the *trigger
+ * or the *filter may be set to NULL by this function - if not set to
+ * NULL, they will contain strings corresponding to the trigger and
+ * filter.
+ *
+ * There are two cases that need to be handled with respect to the
+ * passed-in param: either the param is required, or it is not
+ * required. If @param_required is set, and there's no param, it will
+ * return -EINVAL. If @param_required is not set and there's a param
+ * that starts with a number, that corresponds to the case of a
+ * trigger with :n (n = number of times the trigger should fire) and
+ * the parsing continues normally; otherwise the function just returns
+ * and assumes param just contains a filter and there's nothing else
+ * to do.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required)
+{
+ int ret = 0;
+
+ *param = *filter = NULL;
+
+ if (!param_and_filter) {
+ if (param_required)
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Here we check for an optional param. The only legal
+ * optional param is :n, and if that's the case, continue
+ * below. Otherwise we assume what's left is a filter and
+ * return it as the filter string for the caller to deal with.
+ */
+ if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) {
+ *filter = param_and_filter;
+ goto out;
+ }
+
+ /*
+ * Separate the param from the filter (param [if filter]).
+ * Here we have either an optional :n param or a required
+ * param and an optional filter.
+ */
+ *param = strsep(&param_and_filter, " \t");
+
+ /*
+ * Here we have a filter, though it may be empty.
+ */
+ if (param_and_filter) {
+ *filter = skip_spaces(param_and_filter);
+ if (!**filter)
+ *filter = NULL;
+ }
+out:
+ return ret;
+}
+
+/**
+ * event_trigger_alloc - allocate and init event_trigger_data for a trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @cmd: The cmd string
+ * @param: The param string
+ * @private_data: User data to associate with the event trigger
+ *
+ * Allocate an event_trigger_data instance and initialize it. The
+ * @cmd_ops are used along with the @cmd and @param to get the
+ * trigger_ops to assign to the event_trigger_data. @private_data can
+ * also be passed in and associated with the event_trigger_data.
+ *
+ * Use event_trigger_free() to free an event_trigger_data object.
+ *
+ * Return: The trigger_data object success, NULL otherwise
+ */
+struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data)
+{
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, param);
+
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ return NULL;
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ trigger_data->private_data = private_data;
+
+ INIT_LIST_HEAD(&trigger_data->list);
+ INIT_LIST_HEAD(&trigger_data->named_list);
+ RCU_INIT_POINTER(trigger_data->filter, NULL);
+
+ return trigger_data;
+}
+
+/**
+ * event_trigger_parse_num - parse and return the number param for a trigger
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Parse the :n (n = number of times the trigger should fire) param
+ * and set the count variable in the trigger_data to the parsed count.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_parse_num(char *param,
+ struct event_trigger_data *trigger_data)
+{
+ char *number;
+ int ret = 0;
+
+ if (param) {
+ number = strsep(&param, ":");
+
+ if (!strlen(number))
+ return -EINVAL;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ }
+
+ return ret;
+}
+
+/**
+ * event_trigger_set_filter - set an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @param: The string containing the filter
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Set the filter for the trigger. If the filter is NULL, just return
+ * without error.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data)
+{
+ if (param && cmd_ops->set_filter)
+ return cmd_ops->set_filter(param, trigger_data, file);
+
+ return 0;
+}
+
+/**
+ * event_trigger_reset_filter - reset an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Reset the filter for the trigger to no filter.
+ */
+void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data)
+{
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+}
+
+/**
+ * event_trigger_register - register an event trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @glob: The trigger command string, with optional remove(!) operator
+ * @cmd: The cmd string
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ * @n_registered: optional outparam, the number of triggers registered
+ *
+ * Register an event trigger. The @cmd_ops are used to call the
+ * cmd_ops->reg() function which actually does the registration. The
+ * cmd_ops->reg() function returns the number of triggers registered,
+ * which is assigned to n_registered, if n_registered is non-NULL.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *param,
+ struct event_trigger_data *trigger_data,
+ int *n_registered)
+{
+ int ret;
+
+ if (n_registered)
+ *n_registered = 0;
+
+ ret = cmd_ops->reg(glob, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ cmd_ops->unreg(glob, trigger_data, file);
+ ret = -ENOENT;
+ } else if (ret > 0) {
+ if (n_registered)
+ *n_registered = ret;
+ /* Just return zero, not the number of enabled functions */
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * End event trigger parsing helper functions.
+ */
+
/**
- * event_trigger_callback - Generic event_command @func implementation
+ * event_trigger_parse - Generic event_command @parse implementation
* @cmd_ops: The command ops, used for trigger registration
* @file: The trace_event_file associated with the event
* @glob: The raw string used to register the trigger
@@ -632,15 +972,15 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
* Common implementation for event command parsing and trigger
* instantiation.
*
- * Usually used directly as the @func method in event command
+ * Usually used directly as the @parse method in event command
* implementations.
*
* Return: 0 on success, errno otherwise
*/
static int
-event_trigger_callback(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+event_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct event_trigger_data *trigger_data;
struct event_trigger_ops *trigger_ops;
@@ -673,7 +1013,7 @@ event_trigger_callback(struct event_command *cmd_ops,
INIT_LIST_HEAD(&trigger_data->named_list);
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
ret = 0;
goto out;
@@ -708,14 +1048,14 @@ event_trigger_callback(struct event_command *cmd_ops,
out_reg:
/* Up the trigger_data count to make sure reg doesn't free it on failure */
event_trigger_init(trigger_ops, trigger_data);
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
* Consider no functions a failure too.
*/
if (!ret) {
- cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob, trigger_data, file);
ret = -ENOENT;
} else if (ret > 0)
ret = 0;
@@ -1023,28 +1363,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops traceon_trigger_ops = {
- .func = traceon_trigger,
+ .trigger = traceon_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceon_count_trigger_ops = {
- .func = traceon_count_trigger,
+ .trigger = traceon_count_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_trigger_ops = {
- .func = traceoff_trigger,
+ .trigger = traceoff_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_count_trigger_ops = {
- .func = traceoff_count_trigger,
+ .trigger = traceoff_count_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1069,7 +1409,7 @@ onoff_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_traceon_cmd = {
.name = "traceon",
.trigger_type = ETT_TRACE_ONOFF,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1080,7 +1420,7 @@ static struct event_command trigger_traceoff_cmd = {
.name = "traceoff",
.trigger_type = ETT_TRACE_ONOFF,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1116,14 +1456,14 @@ snapshot_count_trigger(struct event_trigger_data *data,
}
static int
-register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
+register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
if (tracing_alloc_snapshot_instance(file->tr) != 0)
return 0;
- return register_trigger(glob, ops, data, file);
+ return register_trigger(glob, data, file);
}
static int
@@ -1135,14 +1475,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops snapshot_trigger_ops = {
- .func = snapshot_trigger,
+ .trigger = snapshot_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops snapshot_count_trigger_ops = {
- .func = snapshot_count_trigger,
+ .trigger = snapshot_count_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1157,7 +1497,7 @@ snapshot_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_snapshot_cmd = {
.name = "snapshot",
.trigger_type = ETT_SNAPSHOT,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_snapshot_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = snapshot_get_trigger_ops,
@@ -1226,14 +1566,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops stacktrace_trigger_ops = {
- .func = stacktrace_trigger,
+ .trigger = stacktrace_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops stacktrace_count_trigger_ops = {
- .func = stacktrace_count_trigger,
+ .trigger = stacktrace_count_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1249,7 +1589,7 @@ static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = stacktrace_get_trigger_ops,
@@ -1353,36 +1693,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_enable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_enable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct trace_event_file *event_enable_file;
struct enable_trigger_data *enable_data;
@@ -1455,7 +1795,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
trigger_data->private_data = enable_data;
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
kfree(enable_data);
ret = 0;
@@ -1502,7 +1842,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
ret = trace_event_enable_disable(event_enable_file, 1, 1);
if (ret < 0)
goto out_put;
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
@@ -1532,7 +1872,6 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
}
int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -1574,7 +1913,6 @@ out:
}
void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -1628,7 +1966,7 @@ event_enable_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_enable_cmd = {
.name = ENABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
@@ -1638,7 +1976,7 @@ static struct event_command trigger_enable_cmd = {
static struct event_command trigger_disable_cmd = {
.name = DISABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 56bb7b890578..d440ddd5fd8b 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -491,18 +491,14 @@ static void stop_per_cpu_kthreads(void)
static int start_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
- char comm[24];
- snprintf(comm, 24, "hwlatd/%d", cpu);
-
- kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
return -ENOMEM;
}
per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4e1257f50aa3..508f14af4f2c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -328,11 +328,9 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
static void __disable_trace_kprobe(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_kprobe *tk;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (!trace_kprobe_is_registered(tk))
continue;
if (trace_kprobe_is_return(tk))
@@ -349,7 +347,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp)
static int enable_trace_kprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_kprobe *tk;
bool enabled;
int ret = 0;
@@ -370,8 +368,7 @@ static int enable_trace_kprobe(struct trace_event_call *call,
if (enabled)
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (trace_kprobe_has_gone(tk))
continue;
ret = __enable_trace_kprobe(tk);
@@ -560,11 +557,9 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
struct trace_kprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (strcmp(trace_kprobe_symbol(orig),
trace_kprobe_symbol(comp)) ||
trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
@@ -1176,15 +1171,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct dyn_event *ev = v;
struct trace_kprobe *tk;
+ unsigned long nmissed;
if (!is_trace_kprobe(ev))
return 0;
tk = to_trace_kprobe(ev);
+ nmissed = trace_kprobe_is_return(tk) ?
+ tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
seq_printf(m, " %-44s %15lu %15lu\n",
trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
- tk->rp.kp.nmissed);
+ nmissed);
return 0;
}
@@ -1384,17 +1382,11 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
@@ -1431,16 +1423,11 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 7520d43aed55..870a08da5b48 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -138,8 +138,7 @@ static void osnoise_unregister_instance(struct trace_array *tr)
if (!found)
return;
- synchronize_rcu();
- kfree(inst);
+ kvfree_rcu(inst);
}
/*
@@ -1701,7 +1700,7 @@ static int start_kthread(unsigned int cpu)
snprintf(comm, 24, "osnoise/%d", cpu);
}
- kthread = kthread_create_on_cpu(main, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
@@ -1710,7 +1709,6 @@ static int start_kthread(unsigned int cpu)
}
per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
@@ -2123,6 +2121,13 @@ out_unhook_irq:
return -EINVAL;
}
+static void osnoise_unhook_events(void)
+{
+ unhook_thread_events();
+ unhook_softirq_events();
+ unhook_irq_events();
+}
+
/*
* osnoise_workload_start - start the workload and hook to events
*/
@@ -2155,7 +2160,14 @@ static int osnoise_workload_start(void)
retval = start_per_cpu_kthreads();
if (retval) {
- unhook_irq_events();
+ trace_osnoise_callback_enabled = false;
+ /*
+ * Make sure that ftrace_nmi_enter/exit() see
+ * trace_osnoise_callback_enabled as false before continuing.
+ */
+ barrier();
+
+ osnoise_unhook_events();
return retval;
}
@@ -2186,9 +2198,7 @@ static void osnoise_workload_stop(void)
stop_per_cpu_kthreads();
- unhook_irq_events();
- unhook_softirq_events();
- unhook_thread_events();
+ osnoise_unhook_events();
}
static void osnoise_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3547e7176ff7..8aa493d25c73 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -445,14 +445,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
char irqs_off;
int hardirq;
int softirq;
+ int bh_off;
int nmi;
nmi = entry->flags & TRACE_FLAG_NMI;
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ bh_off = entry->flags & TRACE_FLAG_BH_OFF;
irqs_off =
+ (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ bh_off ? 'b' :
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
'.';
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 3ed2a3f37297..73d90179b51b 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -356,6 +356,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs)
return -EINVAL;
}
*pbuf = kstrndup(str, len - 1, GFP_KERNEL);
+ if (!*pbuf)
+ return -ENOMEM;
return 0;
}
@@ -1138,8 +1140,7 @@ int trace_probe_remove_file(struct trace_probe *tp,
return -ENOENT;
list_del_rcu(&link->list);
- synchronize_rcu();
- kfree(link);
+ kvfree_rcu(link);
if (list_empty(&tp->event->files))
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8bfcd3b09422..f755bde42fd0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -323,8 +323,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->enter_event->event.type, size, trace_ctx);
if (!event)
return;
@@ -367,8 +366,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->exit_event->event.type, sizeof(*entry),
trace_ctx);
if (!event)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 4f35514a48f3..9711589273cd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -410,12 +410,10 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
struct trace_uprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
struct inode *comp_inode = d_real_inode(comp->path.dentry);
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (comp_inode != d_real_inode(orig->path.dentry) ||
comp->offset != orig->offset)
continue;
@@ -950,8 +948,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
- struct trace_buffer *buffer;
- struct ring_buffer_event *event;
+ struct trace_event_buffer fbuffer;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
@@ -966,12 +963,10 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, trace_file,
- call->event.type, size, 0);
- if (!event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
+ if (!entry)
return;
- entry = ring_buffer_event_data(event);
if (is_ret_probe(tu)) {
entry->vaddr[0] = func;
entry->vaddr[1] = instruction_pointer(regs);
@@ -983,7 +978,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
memcpy(data, ucb->buf, tu->tp.size + dsize);
- event_trigger_unlock_commit(trace_file, buffer, event, entry, 0);
+ trace_event_buffer_commit(&fbuffer);
}
/* uprobe handler */
@@ -1076,14 +1071,12 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter)
static void __probe_event_disable(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_uprobe *tu;
tu = container_of(tp, struct trace_uprobe, tp);
WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter));
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
if (!tu->inode)
continue;
@@ -1095,7 +1088,7 @@ static void __probe_event_disable(struct trace_probe *tp)
static int probe_event_enable(struct trace_event_call *call,
struct trace_event_file *file, filter_func_t filter)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
bool enabled;
int ret;
@@ -1130,8 +1123,7 @@ static int probe_event_enable(struct trace_event_call *call,
if (ret)
goto err_flags;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = trace_uprobe_enable(tu, filter);
if (ret) {
__probe_event_disable(tp);
@@ -1276,7 +1268,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter,
static int uprobe_perf_close(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int ret = 0;
@@ -1288,8 +1280,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
if (trace_uprobe_filter_remove(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
if (ret)
break;
@@ -1301,7 +1292,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
static int uprobe_perf_open(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int err = 0;
@@ -1313,8 +1304,7 @@ static int uprobe_perf_open(struct trace_event_call *call,
if (trace_uprobe_filter_add(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
if (err) {
uprobe_perf_close(call, event);
@@ -1620,6 +1610,11 @@ create_local_trace_uprobe(char *name, unsigned long offs,
tu->path = path;
tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
+ if (!tu->filename) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
init_trace_event_call(tu);
ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index f00de83d0246..1d261fbe367b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
stats->ac_btime64 = btime;
- if (thread_group_leader(tsk)) {
+ if (tsk->flags & PF_EXITING)
stats->ac_exitcode = tsk->exit_code;
- if (tsk->flags & PF_FORKNOEXEC)
- stats->ac_flag |= AFORK;
- }
+ if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
+ stats->ac_flag |= AFORK;
if (tsk->flags & PF_SUPERPRIV)
stats->ac_flag |= ASU;
if (tsk->flags & PF_DUMPCORE)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ad912511a0c0..99afb88d2e85 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -740,6 +740,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
mutex_unlock(&watchdog_mutex);
return err;
}
+
+static const int sixty = 60;
+
+static struct ctl_table watchdog_sysctls[] = {
+ {
+ .procname = "watchdog",
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_thresh",
+ .data = &watchdog_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_watchdog_thresh,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&sixty,
+ },
+ {
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = NMI_WATCHDOG_SYSCTL_PERM,
+ .proc_handler = proc_nmi_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+ {
+ .procname = "soft_watchdog",
+ .data = &soft_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_soft_watchdog,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#ifdef CONFIG_SMP
+ {
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+ {
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#ifdef CONFIG_SMP
+ {
+ .procname = "hardlockup_all_cpu_backtrace",
+ .data = &sysctl_hardlockup_all_cpu_backtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SMP */
+#endif
+ {}
+};
+
+static void __init watchdog_sysctl_init(void)
+{
+ register_sysctl_init("kernel", watchdog_sysctls);
+}
+#else
+#define watchdog_sysctl_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
@@ -753,4 +853,5 @@ void __init lockup_detector_init(void)
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
lockup_detector_setup();
+ watchdog_sysctl_init();
}
diff --git a/lib/Kconfig b/lib/Kconfig
index 655b0e43f260..c80fde816a7e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -65,9 +65,6 @@ config GENERIC_STRNLEN_USER
config GENERIC_NET_UTILS
bool
-config GENERIC_FIND_FIRST_BIT
- bool
-
source "lib/math/Kconfig"
config NO_GENERIC_PCI_IOPORT_MAP
@@ -122,6 +119,8 @@ config INDIRECT_IOMEM_FALLBACK
mmio accesses when the IO memory address is not a registered
emulated region.
+source "lib/crypto/Kconfig"
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
@@ -671,6 +670,10 @@ config STACKDEPOT
bool
select STACKTRACE
+config STACKDEPOT_ALWAYS_INIT
+ bool
+ select STACKDEPOT
+
config STACK_HASH_ORDER
int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
range 12 20
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c77fe36bb3d8..14b89aa37c5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1984,6 +1984,8 @@ config KCOV
bool "Code coverage for fuzzing"
depends on ARCH_HAS_KCOV
depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
+ depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \
+ GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
select DEBUG_FS
select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
help
@@ -2222,12 +2224,11 @@ config TEST_RHASHTABLE
If unsure, say N.
-config TEST_HASH
- tristate "Perform selftest on hash functions"
+config TEST_SIPHASH
+ tristate "Perform selftest on siphash functions"
help
- Enable this option to test the kernel's integer (<linux/hash.h>),
- string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
- hash functions on boot (or module load).
+ Enable this option to test the kernel's siphash (<linux/siphash.h>) hash
+ functions on boot (or module load).
This is intended to help people writing architecture-specific
optimized versions. If unsure, say N.
@@ -2371,6 +2372,25 @@ config BITFIELD_KUNIT
If unsure, say N.
+config HASH_KUNIT_TEST
+ tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test the kernel's string (<linux/stringhash.h>), and
+ integer (<linux/hash.h>) hash functions on boot.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (https://testanything.org/). Only useful for kernel devs
+ running the KUnit test harness, and not intended for inclusion into a
+ production build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ This is intended to help people writing architecture-specific
+ optimized versions. If unsure, say N.
+
config RESOURCE_KUNIT_TEST
tristate "KUnit test for resource API"
depends on KUNIT
@@ -2502,6 +2522,7 @@ config TEST_KMOD
depends on m
depends on NETDEVICES && NET_CORE && INET # for TUN
depends on BLOCK
+ depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS
select TEST_LKM
select XFS_FS
select TUN
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index cdc842d090db..879757b6dd14 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -38,7 +38,7 @@ menuconfig KASAN
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
- select STACKDEPOT
+ select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (KernelAddressSANitizer) - runtime memory debugger,
designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e5372a13511d..236c5cefc4cc 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -112,19 +112,6 @@ config UBSAN_UNREACHABLE
This option enables -fsanitize=unreachable which checks for control
flow reaching an expected-to-be-unreachable position.
-config UBSAN_OBJECT_SIZE
- bool "Perform checking for accesses beyond the end of objects"
- default UBSAN
- # gcc hugely expands stack usage with -fsanitize=object-size
- # https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/
- depends on !CC_IS_GCC
- depends on $(cc-option,-fsanitize=object-size)
- help
- This option enables -fsanitize=object-size which checks for accesses
- beyond the end of objects where the optimizer can determine both the
- object being operated on and its size, usually seen with bad downcasts,
- or access to struct members from NULL pointers.
-
config UBSAN_BOOL
bool "Perform checking for non-boolean values used as boolean"
default UBSAN
diff --git a/lib/Makefile b/lib/Makefile
index b213a7bbf3fd..300f569c626b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -61,7 +61,8 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
CFLAGS_test_bitops.o += -Werror
obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
+obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o
+obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
obj-$(CONFIG_TEST_IDA) += test_ida.o
obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
CFLAGS_test_kasan.o += -fno-builtin
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 8620f38e117c..e8e525650cf2 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+menu "Crypto library routines"
+
config CRYPTO_LIB_AES
tristate
@@ -31,7 +33,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_LIB_CHACHA_GENERIC
tristate
- select CRYPTO_ALGAPI
+ select XOR_BLOCKS
help
This symbol can be depended upon by arch implementations of the
ChaCha library interface that require the generic code as a
@@ -40,7 +42,8 @@ config CRYPTO_LIB_CHACHA_GENERIC
of CRYPTO_LIB_CHACHA.
config CRYPTO_LIB_CHACHA
- tristate
+ tristate "ChaCha library interface"
+ depends on CRYPTO
depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
help
@@ -65,7 +68,7 @@ config CRYPTO_LIB_CURVE25519_GENERIC
of CRYPTO_LIB_CURVE25519.
config CRYPTO_LIB_CURVE25519
- tristate
+ tristate "Curve25519 scalar multiplication library"
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
help
@@ -100,7 +103,7 @@ config CRYPTO_LIB_POLY1305_GENERIC
of CRYPTO_LIB_POLY1305.
config CRYPTO_LIB_POLY1305
- tristate
+ tristate "Poly1305 library interface"
depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
help
@@ -109,14 +112,18 @@ config CRYPTO_LIB_POLY1305
is available and enabled.
config CRYPTO_LIB_CHACHA20POLY1305
- tristate
+ tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)"
depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
+ depends on CRYPTO
select CRYPTO_LIB_CHACHA
select CRYPTO_LIB_POLY1305
+ select CRYPTO_ALGAPI
config CRYPTO_LIB_SHA256
tristate
config CRYPTO_LIB_SM4
tristate
+
+endmenu
diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
index 5d9ea53be973..409e4b728770 100644
--- a/lib/crypto/blake2s-selftest.c
+++ b/lib/crypto/blake2s-selftest.c
@@ -15,7 +15,6 @@
* #include <stdio.h>
*
* #include <openssl/evp.h>
- * #include <openssl/hmac.h>
*
* #define BLAKE2S_TESTVEC_COUNT 256
*
@@ -58,16 +57,6 @@
* }
* printf("};\n\n");
*
- * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
- *
- * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL);
- * print_vec(hash, BLAKE2S_OUTBYTES);
- *
- * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL);
- * print_vec(hash, BLAKE2S_OUTBYTES);
- *
- * printf("};\n");
- *
* return 0;
*}
*/
@@ -554,15 +543,6 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, },
};
-static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
- { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70,
- 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79,
- 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, },
- { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9,
- 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f,
- 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, },
-};
-
bool __init blake2s_selftest(void)
{
u8 key[BLAKE2S_KEY_SIZE];
@@ -607,16 +587,5 @@ bool __init blake2s_selftest(void)
}
}
- if (success) {
- blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key));
- success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE);
-
- blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf));
- success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE);
-
- if (!success)
- pr_err("blake2s256_hmac self-test: FAIL\n");
- }
-
return success;
}
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 93f2ae051370..9364f79937b8 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -30,43 +30,6 @@ void blake2s_final(struct blake2s_state *state, u8 *out)
}
EXPORT_SYMBOL(blake2s_final);
-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
- const size_t keylen)
-{
- struct blake2s_state state;
- u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
- u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
- int i;
-
- if (keylen > BLAKE2S_BLOCK_SIZE) {
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, key, keylen);
- blake2s_final(&state, x_key);
- } else
- memcpy(x_key, key, keylen);
-
- for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
- x_key[i] ^= 0x36;
-
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
- blake2s_update(&state, in, inlen);
- blake2s_final(&state, i_hash);
-
- for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
- x_key[i] ^= 0x5c ^ 0x36;
-
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
- blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
- blake2s_final(&state, i_hash);
-
- memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
- memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
- memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
-}
-EXPORT_SYMBOL(blake2s256_hmac);
-
static int __init blake2s_mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 0f8e2e369b1d..1b8e4b2a9cba 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -89,6 +89,27 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
EXPORT_SYMBOL(_find_first_bit);
#endif
+#ifndef find_first_and_bit
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx] & addr2[idx];
+ if (val)
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(_find_first_and_bit);
+#endif
+
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5637c5711db9..db904b57d4b8 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -49,6 +49,25 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len)
return 0;
}
+static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len)
+{
+ static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
+ unsigned long i, cnt;
+ ktime_t time;
+
+ bitmap_copy(cp, bitmap, BITMAP_LEN);
+
+ time = ktime_get();
+ for (cnt = i = 0; i < len; cnt++) {
+ i = find_first_and_bit(cp, bitmap2, len);
+ __clear_bit(i, cp);
+ }
+ time = ktime_get() - time;
+ pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
static int __init test_find_next_bit(const void *bitmap, unsigned long len)
{
unsigned long i, cnt;
@@ -129,6 +148,7 @@ static int __init find_bit_test(void)
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
+ test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
@@ -145,6 +165,7 @@ static int __init find_bit_test(void)
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
+ test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
/*
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 9a57257988c7..00fc50d0a640 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool)
list_del(&chunk->next_chunk);
end_bit = chunk_size(chunk) >> order;
- bit = find_next_bit(chunk->bits, end_bit, 0);
+ bit = find_first_bit(chunk->bits, end_bit);
BUG_ON(bit < end_bit);
vfree(chunk);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 059b8b00dc53..886510d248e5 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -22,6 +22,7 @@
#include "kstrtox.h"
+noinline
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
{
if (*base == 0) {
@@ -47,6 +48,7 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
*
* Don't you dare use this function.
*/
+noinline
unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
size_t max_chars)
{
@@ -85,6 +87,7 @@ unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned lon
return rv;
}
+noinline
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
{
return _parse_integer_limit(s, base, p, INT_MAX);
@@ -125,6 +128,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoull(). Return code must be checked.
*/
+noinline
int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
if (s[0] == '+')
@@ -148,6 +152,7 @@ EXPORT_SYMBOL(kstrtoull);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoll(). Return code must be checked.
*/
+noinline
int kstrtoll(const char *s, unsigned int base, long long *res)
{
unsigned long long tmp;
@@ -219,6 +224,7 @@ EXPORT_SYMBOL(_kstrtol);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtoul(). Return code must be checked.
*/
+noinline
int kstrtouint(const char *s, unsigned int base, unsigned int *res)
{
unsigned long long tmp;
@@ -249,6 +255,7 @@ EXPORT_SYMBOL(kstrtouint);
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
* Preferred over simple_strtol(). Return code must be checked.
*/
+noinline
int kstrtoint(const char *s, unsigned int base, int *res)
{
long long tmp;
@@ -264,6 +271,7 @@ int kstrtoint(const char *s, unsigned int base, int *res)
}
EXPORT_SYMBOL(kstrtoint);
+noinline
int kstrtou16(const char *s, unsigned int base, u16 *res)
{
unsigned long long tmp;
@@ -279,6 +287,7 @@ int kstrtou16(const char *s, unsigned int base, u16 *res)
}
EXPORT_SYMBOL(kstrtou16);
+noinline
int kstrtos16(const char *s, unsigned int base, s16 *res)
{
long long tmp;
@@ -294,6 +303,7 @@ int kstrtos16(const char *s, unsigned int base, s16 *res)
}
EXPORT_SYMBOL(kstrtos16);
+noinline
int kstrtou8(const char *s, unsigned int base, u8 *res)
{
unsigned long long tmp;
@@ -309,6 +319,7 @@ int kstrtou8(const char *s, unsigned int base, u8 *res)
}
EXPORT_SYMBOL(kstrtou8);
+noinline
int kstrtos8(const char *s, unsigned int base, s8 *res)
{
long long tmp;
@@ -333,6 +344,7 @@ EXPORT_SYMBOL(kstrtos8);
* [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
* pointed to by res is updated upon finding a match.
*/
+noinline
int kstrtobool(const char *s, bool *res)
{
if (!s)
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index 0dd434e40487..be38a2c5ecc2 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -17,7 +17,7 @@
void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)
{
try_catch->try_result = -EFAULT;
- complete_and_exit(try_catch->try_completion, -EFAULT);
+ kthread_complete_and_exit(try_catch->try_completion, -EFAULT);
}
EXPORT_SYMBOL_GPL(kunit_try_catch_throw);
@@ -27,7 +27,7 @@ static int kunit_generic_run_threadfn_adapter(void *data)
try_catch->try(try_catch->context);
- complete_and_exit(try_catch->try_completion, 0);
+ kthread_complete_and_exit(try_catch->try_completion, 0);
}
static unsigned long kunit_test_timeout(void)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 5d5424b51b74..9daa3fb9d1cd 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -49,11 +49,11 @@ bool __list_del_entry_valid(struct list_head *entry)
"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
entry, LIST_POISON2) ||
CHECK_DATA_CORRUPTION(prev->next != entry,
- "list_del corruption. prev->next should be %px, but was %px\n",
- entry, prev->next) ||
+ "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
+ entry, prev->next, prev) ||
CHECK_DATA_CORRUPTION(next->prev != entry,
- "list_del corruption. next->prev should be %px, but was %px\n",
- entry, next->prev))
+ "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
+ entry, next->prev, next))
return false;
return true;
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 673bd206aa98..330aa539b46e 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -36,6 +36,8 @@
*/
#include <asm/unaligned.h>
+
+#include <linux/bitops.h>
#include <linux/string.h> /* memset, memcpy */
#define FORCE_INLINE __always_inline
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 0ae2e66dcf0f..a6789c0c626b 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -69,9 +69,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
unsigned long entries[REF_TRACKER_STACK_ENTRIES];
struct ref_tracker *tracker;
unsigned int nr_entries;
+ gfp_t gfp_mask = gfp;
unsigned long flags;
- *trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
+ if (gfp & __GFP_DIRECT_RECLAIM)
+ gfp_mask |= __GFP_NOFAIL;
+ *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
if (unlikely(!tracker)) {
pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
refcount_inc(&dir->untracked);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..6220fa67fb7e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+
+ wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+ users, 4, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
diff --git a/lib/sha1.c b/lib/sha1.c
index 9bd1935a1472..0494766fc574 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/bitops.h>
+#include <linux/string.h>
#include <crypto/sha1.h>
#include <asm/unaligned.h>
@@ -55,7 +56,8 @@
#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
__u32 TEMP = input(t); setW(t, TEMP); \
E += TEMP + rol32(A,5) + (fn) + (constant); \
- B = ror32(B, 2); } while (0)
+ B = ror32(B, 2); \
+ TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0)
#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
@@ -84,6 +86,7 @@
void sha1_transform(__u32 *digest, const char *data, __u32 *array)
{
__u32 A, B, C, D, E;
+ unsigned int i = 0;
A = digest[0];
B = digest[1];
@@ -92,94 +95,24 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array)
E = digest[4];
/* Round 1 - iterations 0-16 take their input from 'data' */
- T_0_15( 0, A, B, C, D, E);
- T_0_15( 1, E, A, B, C, D);
- T_0_15( 2, D, E, A, B, C);
- T_0_15( 3, C, D, E, A, B);
- T_0_15( 4, B, C, D, E, A);
- T_0_15( 5, A, B, C, D, E);
- T_0_15( 6, E, A, B, C, D);
- T_0_15( 7, D, E, A, B, C);
- T_0_15( 8, C, D, E, A, B);
- T_0_15( 9, B, C, D, E, A);
- T_0_15(10, A, B, C, D, E);
- T_0_15(11, E, A, B, C, D);
- T_0_15(12, D, E, A, B, C);
- T_0_15(13, C, D, E, A, B);
- T_0_15(14, B, C, D, E, A);
- T_0_15(15, A, B, C, D, E);
+ for (; i < 16; ++i)
+ T_0_15(i, A, B, C, D, E);
/* Round 1 - tail. Input from 512-bit mixing array */
- T_16_19(16, E, A, B, C, D);
- T_16_19(17, D, E, A, B, C);
- T_16_19(18, C, D, E, A, B);
- T_16_19(19, B, C, D, E, A);
+ for (; i < 20; ++i)
+ T_16_19(i, A, B, C, D, E);
/* Round 2 */
- T_20_39(20, A, B, C, D, E);
- T_20_39(21, E, A, B, C, D);
- T_20_39(22, D, E, A, B, C);
- T_20_39(23, C, D, E, A, B);
- T_20_39(24, B, C, D, E, A);
- T_20_39(25, A, B, C, D, E);
- T_20_39(26, E, A, B, C, D);
- T_20_39(27, D, E, A, B, C);
- T_20_39(28, C, D, E, A, B);
- T_20_39(29, B, C, D, E, A);
- T_20_39(30, A, B, C, D, E);
- T_20_39(31, E, A, B, C, D);
- T_20_39(32, D, E, A, B, C);
- T_20_39(33, C, D, E, A, B);
- T_20_39(34, B, C, D, E, A);
- T_20_39(35, A, B, C, D, E);
- T_20_39(36, E, A, B, C, D);
- T_20_39(37, D, E, A, B, C);
- T_20_39(38, C, D, E, A, B);
- T_20_39(39, B, C, D, E, A);
+ for (; i < 40; ++i)
+ T_20_39(i, A, B, C, D, E);
/* Round 3 */
- T_40_59(40, A, B, C, D, E);
- T_40_59(41, E, A, B, C, D);
- T_40_59(42, D, E, A, B, C);
- T_40_59(43, C, D, E, A, B);
- T_40_59(44, B, C, D, E, A);
- T_40_59(45, A, B, C, D, E);
- T_40_59(46, E, A, B, C, D);
- T_40_59(47, D, E, A, B, C);
- T_40_59(48, C, D, E, A, B);
- T_40_59(49, B, C, D, E, A);
- T_40_59(50, A, B, C, D, E);
- T_40_59(51, E, A, B, C, D);
- T_40_59(52, D, E, A, B, C);
- T_40_59(53, C, D, E, A, B);
- T_40_59(54, B, C, D, E, A);
- T_40_59(55, A, B, C, D, E);
- T_40_59(56, E, A, B, C, D);
- T_40_59(57, D, E, A, B, C);
- T_40_59(58, C, D, E, A, B);
- T_40_59(59, B, C, D, E, A);
+ for (; i < 60; ++i)
+ T_40_59(i, A, B, C, D, E);
/* Round 4 */
- T_60_79(60, A, B, C, D, E);
- T_60_79(61, E, A, B, C, D);
- T_60_79(62, D, E, A, B, C);
- T_60_79(63, C, D, E, A, B);
- T_60_79(64, B, C, D, E, A);
- T_60_79(65, A, B, C, D, E);
- T_60_79(66, E, A, B, C, D);
- T_60_79(67, D, E, A, B, C);
- T_60_79(68, C, D, E, A, B);
- T_60_79(69, B, C, D, E, A);
- T_60_79(70, A, B, C, D, E);
- T_60_79(71, E, A, B, C, D);
- T_60_79(72, D, E, A, B, C);
- T_60_79(73, C, D, E, A, B);
- T_60_79(74, B, C, D, E, A);
- T_60_79(75, A, B, C, D, E);
- T_60_79(76, E, A, B, C, D);
- T_60_79(77, D, E, A, B, C);
- T_60_79(78, C, D, E, A, B);
- T_60_79(79, B, C, D, E, A);
+ for (; i < 80; ++i)
+ T_60_79(i, A, B, C, D, E);
digest[0] += A;
digest[1] += B;
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index b437ae79aca1..bf5ba9af0500 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -23,6 +23,7 @@
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/printk.h>
#include <linux/slab.h>
@@ -161,18 +162,40 @@ static int __init is_stack_depot_disabled(char *str)
}
early_param("stack_depot_disable", is_stack_depot_disabled);
-int __init stack_depot_init(void)
+/*
+ * __ref because of memblock_alloc(), which will not be actually called after
+ * the __init code is gone, because at that point slab_is_available() is true
+ */
+__ref int stack_depot_init(void)
{
- if (!stack_depot_disable) {
+ static DEFINE_MUTEX(stack_depot_init_mutex);
+
+ mutex_lock(&stack_depot_init_mutex);
+ if (!stack_depot_disable && !stack_table) {
size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
int i;
- stack_table = memblock_alloc(size, size);
- for (i = 0; i < STACK_HASH_SIZE; i++)
- stack_table[i] = NULL;
+ if (slab_is_available()) {
+ pr_info("Stack Depot allocating hash table with kvmalloc\n");
+ stack_table = kvmalloc(size, GFP_KERNEL);
+ } else {
+ pr_info("Stack Depot allocating hash table with memblock_alloc\n");
+ stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
+ }
+ if (stack_table) {
+ for (i = 0; i < STACK_HASH_SIZE; i++)
+ stack_table[i] = NULL;
+ } else {
+ pr_err("Stack Depot hash table allocation failed, disabling\n");
+ stack_depot_disable = true;
+ mutex_unlock(&stack_depot_init_mutex);
+ return -ENOMEM;
+ }
}
+ mutex_unlock(&stack_depot_init_mutex);
return 0;
}
+EXPORT_SYMBOL_GPL(stack_depot_init);
/* Calculate hash for a stack */
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -305,6 +328,9 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
* (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
* any allocations and will fail if no space is left to store the stack trace.
*
+ * If the stack trace in @entries is from an interrupt, only the portion up to
+ * interrupt entry is saved.
+ *
* Context: Any context, but setting @can_alloc to %false is required if
* alloc_pages() cannot be used from the current context. Currently
* this is the case from contexts where neither %GFP_ATOMIC nor
@@ -323,6 +349,16 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned long flags;
u32 hash;
+ /*
+ * If this stack trace is from an interrupt, including anything before
+ * interrupt entry usually leads to unbounded stackdepot growth.
+ *
+ * Because use of filter_irq_stacks() is a requirement to ensure
+ * stackdepot can efficiently deduplicate interrupt stacks, always
+ * filter_irq_stacks() to simplify all callers' use of stackdepot.
+ */
+ nr_entries = filter_irq_stacks(entries, nr_entries);
+
if (unlikely(nr_entries == 0) || stack_depot_disable)
goto fast_exit;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index d33fa5a61b95..0c82f07f74fc 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -446,6 +446,42 @@ static void __init test_bitmap_parselist(void)
}
}
+static void __init test_bitmap_printlist(void)
+{
+ unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ char expected[256];
+ int ret, slen;
+ ktime_t time;
+
+ if (!buf || !bmap)
+ goto out;
+
+ memset(bmap, -1, PAGE_SIZE);
+ slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1);
+ if (slen < 0)
+ goto out;
+
+ time = ktime_get();
+ ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8);
+ time = ktime_get() - time;
+
+ if (ret != slen + 1) {
+ pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen);
+ goto out;
+ }
+
+ if (strncmp(buf, expected, slen)) {
+ pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected);
+ goto out;
+ }
+
+ pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time);
+out:
+ kfree(buf);
+ kfree(bmap);
+}
+
static const unsigned long parse_test[] __initconst = {
BITMAP_FROM_U64(0),
BITMAP_FROM_U64(1),
@@ -818,6 +854,7 @@ static void __init selftest(void)
test_bitmap_arr32();
test_bitmap_parse();
test_bitmap_parselist();
+ test_bitmap_printlist();
test_mem_optimisations();
test_for_each_set_clump8();
test_bitmap_cut();
diff --git a/lib/test_hash.c b/lib/test_hash.c
index 0ee40b4a56dd..bb25fda34794 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -14,17 +14,15 @@
* and hash_64().
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
-
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/stringhash.h>
-#include <linux/printk.h>
+#include <kunit/test.h>
/* 32-bit XORSHIFT generator. Seed must not be zero. */
-static u32 __init __attribute_const__
+static u32 __attribute_const__
xorshift(u32 seed)
{
seed ^= seed << 13;
@@ -34,7 +32,7 @@ xorshift(u32 seed)
}
/* Given a non-zero x, returns a non-zero byte. */
-static u8 __init __attribute_const__
+static u8 __attribute_const__
mod255(u32 x)
{
x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */
@@ -45,8 +43,7 @@ mod255(u32 x)
}
/* Fill the buffer with non-zero bytes. */
-static void __init
-fill_buf(char *buf, size_t len, u32 seed)
+static void fill_buf(char *buf, size_t len, u32 seed)
{
size_t i;
@@ -56,6 +53,50 @@ fill_buf(char *buf, size_t len, u32 seed)
}
}
+/* Holds most testing variables for the int test. */
+struct test_hash_params {
+ /* Pointer to integer to be hashed. */
+ unsigned long long *h64;
+ /* Low 32-bits of integer to be hashed. */
+ u32 h0;
+ /* Arch-specific hash result. */
+ u32 h1;
+ /* Generic hash result. */
+ u32 h2;
+ /* ORed hashes of given size (in bits). */
+ u32 (*hash_or)[33];
+};
+
+#ifdef HAVE_ARCH__HASH_32
+static void
+test_int__hash_32(struct kunit *test, struct test_hash_params *params)
+{
+ params->hash_or[1][0] |= params->h2 = __hash_32_generic(params->h0);
+#if HAVE_ARCH__HASH_32 == 1
+ KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+ "__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
+ params->h0, params->h1, params->h2);
+#endif
+}
+#endif
+
+#ifdef HAVE_ARCH_HASH_64
+static void
+test_int_hash_64(struct kunit *test, struct test_hash_params *params, u32 const *m, int *k)
+{
+ params->h2 = hash_64_generic(*params->h64, *k);
+#if HAVE_ARCH_HASH_64 == 1
+ KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+ "hash_64(%#llx, %d) = %#x != hash_64_generic() = %#x",
+ *params->h64, *k, params->h1, params->h2);
+#else
+ KUNIT_EXPECT_LE_MSG(test, params->h1, params->h2,
+ "hash_64_generic(%#llx, %d) = %#x > %#x",
+ *params->h64, *k, params->h1, *m);
+#endif
+}
+#endif
+
/*
* Test the various integer hash functions. h64 (or its low-order bits)
* is the integer to hash. hash_or accumulates the OR of the hash values,
@@ -65,23 +106,16 @@ fill_buf(char *buf, size_t len, u32 seed)
* inline, the code being tested is actually in the module, and you can
* recompile and re-test the module without rebooting.
*/
-static bool __init
-test_int_hash(unsigned long long h64, u32 hash_or[2][33])
+static void
+test_int_hash(struct kunit *test, unsigned long long h64, u32 hash_or[2][33])
{
int k;
- u32 h0 = (u32)h64, h1, h2;
+ struct test_hash_params params = { &h64, (u32)h64, 0, 0, hash_or };
/* Test __hash32 */
- hash_or[0][0] |= h1 = __hash_32(h0);
+ hash_or[0][0] |= params.h1 = __hash_32(params.h0);
#ifdef HAVE_ARCH__HASH_32
- hash_or[1][0] |= h2 = __hash_32_generic(h0);
-#if HAVE_ARCH__HASH_32 == 1
- if (h1 != h2) {
- pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
- h0, h1, h2);
- return false;
- }
-#endif
+ test_int__hash_32(test, &params);
#endif
/* Test k = 1..32 bits */
@@ -89,63 +123,53 @@ test_int_hash(unsigned long long h64, u32 hash_or[2][33])
u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */
/* Test hash_32 */
- hash_or[0][k] |= h1 = hash_32(h0, k);
- if (h1 > m) {
- pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
- return false;
- }
-#ifdef HAVE_ARCH_HASH_32
- h2 = hash_32_generic(h0, k);
-#if HAVE_ARCH_HASH_32 == 1
- if (h1 != h2) {
- pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
- " = %#x", h0, k, h1, h2);
- return false;
- }
-#else
- if (h2 > m) {
- pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
- h0, k, h1, m);
- return false;
- }
-#endif
-#endif
+ hash_or[0][k] |= params.h1 = hash_32(params.h0, k);
+ KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+ "hash_32(%#x, %d) = %#x > %#x",
+ params.h0, k, params.h1, m);
+
/* Test hash_64 */
- hash_or[1][k] |= h1 = hash_64(h64, k);
- if (h1 > m) {
- pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
- return false;
- }
+ hash_or[1][k] |= params.h1 = hash_64(h64, k);
+ KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+ "hash_64(%#llx, %d) = %#x > %#x",
+ h64, k, params.h1, m);
#ifdef HAVE_ARCH_HASH_64
- h2 = hash_64_generic(h64, k);
-#if HAVE_ARCH_HASH_64 == 1
- if (h1 != h2) {
- pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
- "= %#x", h64, k, h1, h2);
- return false;
- }
-#else
- if (h2 > m) {
- pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
- h64, k, h1, m);
- return false;
- }
-#endif
+ test_int_hash_64(test, &params, &m, &k);
#endif
}
-
- (void)h2; /* Suppress unused variable warning */
- return true;
}
#define SIZE 256 /* Run time is cubic in SIZE */
-static int __init
-test_hash_init(void)
+static void test_string_or(struct kunit *test)
{
char buf[SIZE+1];
- u32 string_or = 0, hash_or[2][33] = { { 0, } };
- unsigned tests = 0;
+ u32 string_or = 0;
+ int i, j;
+
+ fill_buf(buf, SIZE, 1);
+
+ /* Test every possible non-empty substring in the buffer. */
+ for (j = SIZE; j > 0; --j) {
+ buf[j] = '\0';
+
+ for (i = 0; i <= j; i++) {
+ u32 h0 = full_name_hash(buf+i, buf+i, j-i);
+
+ string_or |= h0;
+ } /* i */
+ } /* j */
+
+ /* The OR of all the hash values should cover all the bits */
+ KUNIT_EXPECT_EQ_MSG(test, string_or, -1u,
+ "OR of all string hash results = %#x != %#x",
+ string_or, -1u);
+}
+
+static void test_hash_or(struct kunit *test)
+{
+ char buf[SIZE+1];
+ u32 hash_or[2][33] = { { 0, } };
unsigned long long h64 = 0;
int i, j;
@@ -160,46 +184,27 @@ test_hash_init(void)
u32 h0 = full_name_hash(buf+i, buf+i, j-i);
/* Check that hashlen_string gets the length right */
- if (hashlen_len(hashlen) != j-i) {
- pr_err("hashlen_string(%d..%d) returned length"
- " %u, expected %d",
- i, j, hashlen_len(hashlen), j-i);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hashlen_len(hashlen), j-i,
+ "hashlen_string(%d..%d) returned length %u, expected %d",
+ i, j, hashlen_len(hashlen), j-i);
/* Check that the hashes match */
- if (hashlen_hash(hashlen) != h0) {
- pr_err("hashlen_string(%d..%d) = %08x != "
- "full_name_hash() = %08x",
- i, j, hashlen_hash(hashlen), h0);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hashlen_hash(hashlen), h0,
+ "hashlen_string(%d..%d) = %08x != full_name_hash() = %08x",
+ i, j, hashlen_hash(hashlen), h0);
- string_or |= h0;
h64 = h64 << 32 | h0; /* For use with hash_64 */
- if (!test_int_hash(h64, hash_or))
- return -EINVAL;
- tests++;
+ test_int_hash(test, h64, hash_or);
} /* i */
} /* j */
- /* The OR of all the hash values should cover all the bits */
- if (~string_or) {
- pr_err("OR of all string hash results = %#x != %#x",
- string_or, -1u);
- return -EINVAL;
- }
- if (~hash_or[0][0]) {
- pr_err("OR of all __hash_32 results = %#x != %#x",
- hash_or[0][0], -1u);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[0][0], -1u,
+ "OR of all __hash_32 results = %#x != %#x",
+ hash_or[0][0], -1u);
#ifdef HAVE_ARCH__HASH_32
#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */
- if (~hash_or[1][0]) {
- pr_err("OR of all __hash_32_generic results = %#x != %#x",
- hash_or[1][0], -1u);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[1][0], -1u,
+ "OR of all __hash_32_generic results = %#x != %#x",
+ hash_or[1][0], -1u);
#endif
#endif
@@ -207,51 +212,27 @@ test_hash_init(void)
for (i = 1; i <= 32; i++) {
u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */
- if (hash_or[0][i] != m) {
- pr_err("OR of all hash_32(%d) results = %#x "
- "(%#x expected)", i, hash_or[0][i], m);
- return -EINVAL;
- }
- if (hash_or[1][i] != m) {
- pr_err("OR of all hash_64(%d) results = %#x "
- "(%#x expected)", i, hash_or[1][i], m);
- return -EINVAL;
- }
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[0][i], m,
+ "OR of all hash_32(%d) results = %#x (%#x expected)",
+ i, hash_or[0][i], m);
+ KUNIT_EXPECT_EQ_MSG(test, hash_or[1][i], m,
+ "OR of all hash_64(%d) results = %#x (%#x expected)",
+ i, hash_or[1][i], m);
}
+}
- /* Issue notices about skipped tests. */
-#ifdef HAVE_ARCH__HASH_32
-#if HAVE_ARCH__HASH_32 != 1
- pr_info("__hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("__hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_32
-#if HAVE_ARCH_HASH_32 != 1
- pr_info("hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_64
-#if HAVE_ARCH_HASH_64 != 1
- pr_info("hash_64() is arch-specific; not compared to generic.");
-#endif
-#else
- pr_info("hash_64() has no arch implementation to test.");
-#endif
-
- pr_notice("%u tests passed.", tests);
+static struct kunit_case hash_test_cases[] __refdata = {
+ KUNIT_CASE(test_string_or),
+ KUNIT_CASE(test_hash_or),
+ {}
+};
- return 0;
-}
+static struct kunit_suite hash_test_suite = {
+ .name = "hash",
+ .test_cases = hash_test_cases,
+};
-static void __exit test_hash_exit(void)
-{
-}
-module_init(test_hash_init); /* Does everything */
-module_exit(test_hash_exit); /* Does nothing */
+kunit_test_suite(hash_test_suite);
MODULE_LICENSE("GPL");
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e2ce8f9b7605..767538089a62 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}
+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 0643573f8686..847cdbefab46 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -700,7 +700,7 @@ static void kmem_cache_bulk(struct kunit *test)
static char global_array[10];
-static void kasan_global_oob(struct kunit *test)
+static void kasan_global_oob_right(struct kunit *test)
{
/*
* Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
@@ -723,6 +723,20 @@ static void kasan_global_oob(struct kunit *test)
KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
}
+static void kasan_global_oob_left(struct kunit *test)
+{
+ char *volatile array = global_array;
+ char *p = array - 3;
+
+ /*
+ * GCC is known to fail this test, skip it.
+ * See https://bugzilla.kernel.org/show_bug.cgi?id=215051.
+ */
+ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_CC_IS_CLANG);
+ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+ KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
+}
+
/* Check that ksize() makes the whole object accessible. */
static void ksize_unpoisons_memory(struct kunit *test)
{
@@ -852,6 +866,16 @@ static void kmem_cache_invalid_free(struct kunit *test)
kmem_cache_destroy(cache);
}
+static void kmem_cache_double_destroy(struct kunit *test)
+{
+ struct kmem_cache *cache;
+
+ cache = kmem_cache_create("test_cache", 200, 0, 0, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+ kmem_cache_destroy(cache);
+ KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache));
+}
+
static void kasan_memchr(struct kunit *test)
{
char *ptr;
@@ -1162,7 +1186,8 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(kmem_cache_oob),
KUNIT_CASE(kmem_cache_accounted),
KUNIT_CASE(kmem_cache_bulk),
- KUNIT_CASE(kasan_global_oob),
+ KUNIT_CASE(kasan_global_oob_right),
+ KUNIT_CASE(kasan_global_oob_left),
KUNIT_CASE(kasan_stack_oob),
KUNIT_CASE(kasan_alloca_oob_left),
KUNIT_CASE(kasan_alloca_oob_right),
@@ -1170,6 +1195,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(ksize_uaf),
KUNIT_CASE(kmem_cache_double_free),
KUNIT_CASE(kmem_cache_invalid_free),
+ KUNIT_CASE(kmem_cache_double_destroy),
KUNIT_CASE(kasan_memchr),
KUNIT_CASE(kasan_memcmp),
KUNIT_CASE(kasan_strings),
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index e4f706a404b3..3ca717f11397 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -337,6 +337,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
if (num)
kmem_cache_free_bulk(c, num, objects);
}
+ kmem_cache_destroy(c);
*total_failures += fail;
return 1;
}
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 3750323973f4..a5a3d6c27e1f 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -128,26 +128,6 @@ static struct ctl_table test_table[] = {
{ }
};
-static struct ctl_table test_sysctl_table[] = {
- {
- .procname = "test_sysctl",
- .maxlen = 0,
- .mode = 0555,
- .child = test_table,
- },
- { }
-};
-
-static struct ctl_table test_sysctl_root_table[] = {
- {
- .procname = "debug",
- .maxlen = 0,
- .mode = 0555,
- .child = test_sysctl_table,
- },
- { }
-};
-
static struct ctl_table_header *test_sysctl_header;
static int __init test_sysctl_init(void)
@@ -155,7 +135,7 @@ static int __init test_sysctl_init(void)
test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
if (!test_data.bitmap_0001)
return -ENOMEM;
- test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+ test_sysctl_header = register_sysctl("debug/test_sysctl", test_table);
if (!test_sysctl_header) {
kfree(test_data.bitmap_0001);
return -ENOMEM;
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 7e7bbd0f3fd2..2062be1f2e80 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -79,15 +79,6 @@ static void test_ubsan_load_invalid_value(void)
eval2 = eval;
}
-static void test_ubsan_null_ptr_deref(void)
-{
- volatile int *ptr = NULL;
- int val;
-
- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
- val = *ptr;
-}
-
static void test_ubsan_misaligned_access(void)
{
volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
@@ -98,29 +89,16 @@ static void test_ubsan_misaligned_access(void)
*ptr = val;
}
-static void test_ubsan_object_size_mismatch(void)
-{
- /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
- volatile int val __aligned(8) = 4;
- volatile long long *ptr, val2;
-
- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
- ptr = (long long *)&val;
- val2 = *ptr;
-}
-
static const test_ubsan_fp test_ubsan_array[] = {
test_ubsan_shift_out_of_bounds,
test_ubsan_out_of_bounds,
test_ubsan_load_invalid_value,
test_ubsan_misaligned_access,
- test_ubsan_object_size_mismatch,
};
/* Excluded because they Oops the module. */
static const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
- test_ubsan_null_ptr_deref,
};
static int __init test_ubsan_init(void)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 53d6081f9e8b..3b8129dd374c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1241,20 +1241,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
struct printf_spec spec, const char *fmt)
{
int nr_bits = max_t(int, spec.field_width, 0);
- /* current bit is 'cur', most recently seen range is [rbot, rtop] */
- int cur, rbot, rtop;
bool first = true;
+ int rbot, rtop;
if (check_pointer(&buf, end, bitmap, spec))
return buf;
- rbot = cur = find_first_bit(bitmap, nr_bits);
- while (cur < nr_bits) {
- rtop = cur;
- cur = find_next_bit(bitmap, nr_bits, cur + 1);
- if (cur < nr_bits && cur <= rtop + 1)
- continue;
-
+ for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) {
if (!first) {
if (buf < end)
*buf = ',';
@@ -1263,15 +1256,12 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
first = false;
buf = number(buf, end, rbot, default_dec_spec);
- if (rbot < rtop) {
- if (buf < end)
- *buf = '-';
- buf++;
-
- buf = number(buf, end, rtop, default_dec_spec);
- }
+ if (rtop == rbot + 1)
+ continue;
- rbot = cur;
+ if (buf < end)
+ *buf = '-';
+ buf = number(++buf, end, rtop - 1, default_dec_spec);
}
return buf;
}
diff --git a/mm/Kconfig b/mm/Kconfig
index 356f4f2c779e..3326ee3903f3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -432,43 +432,20 @@ config NEED_PER_CPU_KM
bool
default y
-config CLEANCACHE
- bool "Enable cleancache driver to cache clean pages if tmem is present"
- help
- Cleancache can be thought of as a page-granularity victim cache
- for clean pages that the kernel's pageframe replacement algorithm
- (PFRA) would like to keep around, but can't since there isn't enough
- memory. So when the PFRA "evicts" a page, it first attempts to use
- cleancache code to put the data contained in that page into
- "transcendent memory", memory that is not directly accessible or
- addressable by the kernel and is of unknown and possibly
- time-varying size. And when a cleancache-enabled
- filesystem wishes to access a page in a file on disk, it first
- checks cleancache to see if it already contains it; if it does,
- the page is copied into the kernel and a disk access is avoided.
- When a transcendent memory driver is available (such as zcache or
- Xen transcendent memory), a significant I/O reduction
- may be achieved. When none is available, all cleancache calls
- are reduced to a single pointer-compare-against-NULL resulting
- in a negligible performance hit.
-
- If unsure, say Y to enable cleancache
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ bool
-config FRONTSWAP
- bool "Enable frontswap to cache swap pages if tmem is present"
- depends on SWAP
- help
- Frontswap is so named because it can be thought of as the opposite
- of a "backing" store for a swap device. The data is stored into
- "transcendent memory", memory that is not directly accessible or
- addressable by the kernel and is of unknown and possibly
- time-varying size. When space in transcendent memory is available,
- a significant swap I/O reduction may be achieved. When none is
- available, all frontswap calls are reduced to a single pointer-
- compare-against-NULL resulting in a negligible performance hit
- and swap data is stored as normal on the matching swap device.
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ bool
+
+config USE_PERCPU_NUMA_NODE_ID
+ bool
+
+config HAVE_SETUP_PER_CPU_AREA
+ bool
- If unsure, say Y to enable frontswap.
+config FRONTSWAP
+ bool
config CMA
bool "Contiguous Memory Allocator"
@@ -533,7 +510,8 @@ config MEM_SOFT_DIRTY
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
- depends on FRONTSWAP && CRYPTO=y
+ depends on SWAP && CRYPTO=y
+ select FRONTSWAP
select ZPOOL
help
A lightweight compressed cache for swap pages. It takes
@@ -900,6 +878,20 @@ config IO_MAPPING
config SECRETMEM
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+config ANON_VMA_NAME
+ bool "Anonymous VMA name support"
+ depends on PROC_FS && ADVISE_SYSCALLS && MMU
+
+ help
+ Allow naming anonymous virtual memory areas.
+
+ This feature allows assigning names to virtual memory areas. Assigned
+ names can be later retrieved from /proc/pid/maps and /proc/pid/smaps
+ and help identifying individual anonymous memory areas.
+ Assigning a name to anonymous virtual memory area might prevent that
+ area from being merged with adjacent virtual memory areas due to the
+ difference in their name.
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 1e73717802f8..5bd5bb097252 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -62,6 +62,30 @@ config PAGE_OWNER
If unsure, say N.
+config PAGE_TABLE_CHECK
+ bool "Check for invalid mappings in user page tables"
+ depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select PAGE_EXTENSION
+ help
+ Check that anonymous page is not being mapped twice with read write
+ permissions. Check that anonymous and file pages are not being
+ erroneously shared. Since the checking is performed at the time
+ entries are added and removed to user page tables, leaking, corruption
+ and double mapping problems are detected synchronously.
+
+ If unsure say "n".
+
+config PAGE_TABLE_CHECK_ENFORCED
+ bool "Enforce the page table checking by default"
+ depends on PAGE_TABLE_CHECK
+ help
+ Always enable page table checking. By default the page table checking
+ is disabled, and can be optionally enabled via page_table_check=on
+ kernel parameter. This config enforces that page table check is always
+ enabled.
+
+ If unsure say "n".
+
config PAGE_POISONING
bool "Poison pages after freeing"
help
diff --git a/mm/Makefile b/mm/Makefile
index 7919cd7f13f2..70d4309c9ce3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,7 +104,6 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o
obj-$(CONFIG_PAGE_OWNER) += page_owner.o
-obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZPOOL) += zpool.o
obj-$(CONFIG_ZBUD) += zbud.o
@@ -114,6 +113,7 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
+obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
obj-$(CONFIG_SECRETMEM) += secretmem.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
deleted file mode 100644
index db7eee9c0886..000000000000
--- a/mm/cleancache.c
+++ /dev/null
@@ -1,315 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Cleancache frontend
- *
- * This code provides the generic "frontend" layer to call a matching
- * "backend" driver implementation of cleancache. See
- * Documentation/vm/cleancache.rst for more information.
- *
- * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
- * Author: Dan Magenheimer
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/exportfs.h>
-#include <linux/mm.h>
-#include <linux/debugfs.h>
-#include <linux/cleancache.h>
-
-/*
- * cleancache_ops is set by cleancache_register_ops to contain the pointers
- * to the cleancache "backend" implementation functions.
- */
-static const struct cleancache_ops *cleancache_ops __read_mostly;
-
-/*
- * Counters available via /sys/kernel/debug/cleancache (if debugfs is
- * properly configured. These are for information only so are not protected
- * against increment races.
- */
-static u64 cleancache_succ_gets;
-static u64 cleancache_failed_gets;
-static u64 cleancache_puts;
-static u64 cleancache_invalidates;
-
-static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
-{
- switch (sb->cleancache_poolid) {
- case CLEANCACHE_NO_BACKEND:
- __cleancache_init_fs(sb);
- break;
- case CLEANCACHE_NO_BACKEND_SHARED:
- __cleancache_init_shared_fs(sb);
- break;
- }
-}
-
-/*
- * Register operations for cleancache. Returns 0 on success.
- */
-int cleancache_register_ops(const struct cleancache_ops *ops)
-{
- if (cmpxchg(&cleancache_ops, NULL, ops))
- return -EBUSY;
-
- /*
- * A cleancache backend can be built as a module and hence loaded after
- * a cleancache enabled filesystem has called cleancache_init_fs. To
- * handle such a scenario, here we call ->init_fs or ->init_shared_fs
- * for each active super block. To differentiate between local and
- * shared filesystems, we temporarily initialize sb->cleancache_poolid
- * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
- * respectively in case there is no backend registered at the time
- * cleancache_init_fs or cleancache_init_shared_fs is called.
- *
- * Since filesystems can be mounted concurrently with cleancache
- * backend registration, we have to be careful to guarantee that all
- * cleancache enabled filesystems that has been mounted by the time
- * cleancache_register_ops is called has got and all mounted later will
- * get cleancache_poolid. This is assured by the following statements
- * tied together:
- *
- * a) iterate_supers skips only those super blocks that has started
- * ->kill_sb
- *
- * b) if iterate_supers encounters a super block that has not finished
- * ->mount yet, it waits until it is finished
- *
- * c) cleancache_init_fs is called from ->mount and
- * cleancache_invalidate_fs is called from ->kill_sb
- *
- * d) we call iterate_supers after cleancache_ops has been set
- *
- * From a) it follows that if iterate_supers skips a super block, then
- * either the super block is already dead, in which case we do not need
- * to bother initializing cleancache for it, or it was mounted after we
- * initiated iterate_supers. In the latter case, it must have seen
- * cleancache_ops set according to d) and initialized cleancache from
- * ->mount by itself according to c). This proves that we call
- * ->init_fs at least once for each active super block.
- *
- * From b) and c) it follows that if iterate_supers encounters a super
- * block that has already started ->init_fs, it will wait until ->mount
- * and hence ->init_fs has finished, then check cleancache_poolid, see
- * that it has already been set and therefore do nothing. This proves
- * that we call ->init_fs no more than once for each super block.
- *
- * Combined together, the last two paragraphs prove the function
- * correctness.
- *
- * Note that various cleancache callbacks may proceed before this
- * function is called or even concurrently with it, but since
- * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
- * until the corresponding ->init_fs has been actually called and
- * cleancache_ops has been set.
- */
- iterate_supers(cleancache_register_ops_sb, NULL);
- return 0;
-}
-EXPORT_SYMBOL(cleancache_register_ops);
-
-/* Called by a cleancache-enabled filesystem at time of mount */
-void __cleancache_init_fs(struct super_block *sb)
-{
- int pool_id = CLEANCACHE_NO_BACKEND;
-
- if (cleancache_ops) {
- pool_id = cleancache_ops->init_fs(PAGE_SIZE);
- if (pool_id < 0)
- pool_id = CLEANCACHE_NO_POOL;
- }
- sb->cleancache_poolid = pool_id;
-}
-EXPORT_SYMBOL(__cleancache_init_fs);
-
-/* Called by a cleancache-enabled clustered filesystem at time of mount */
-void __cleancache_init_shared_fs(struct super_block *sb)
-{
- int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
-
- if (cleancache_ops) {
- pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
- if (pool_id < 0)
- pool_id = CLEANCACHE_NO_POOL;
- }
- sb->cleancache_poolid = pool_id;
-}
-EXPORT_SYMBOL(__cleancache_init_shared_fs);
-
-/*
- * If the filesystem uses exportable filehandles, use the filehandle as
- * the key, else use the inode number.
- */
-static int cleancache_get_key(struct inode *inode,
- struct cleancache_filekey *key)
-{
- int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
- int len = 0, maxlen = CLEANCACHE_KEY_MAX;
- struct super_block *sb = inode->i_sb;
-
- key->u.ino = inode->i_ino;
- if (sb->s_export_op != NULL) {
- fhfn = sb->s_export_op->encode_fh;
- if (fhfn) {
- len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
- if (len <= FILEID_ROOT || len == FILEID_INVALID)
- return -1;
- if (maxlen > CLEANCACHE_KEY_MAX)
- return -1;
- }
- }
- return 0;
-}
-
-/*
- * "Get" data from cleancache associated with the poolid/inode/index
- * that were specified when the data was put to cleanache and, if
- * successful, use it to fill the specified page with data and return 0.
- * The pageframe is unchanged and returns -1 if the get fails.
- * Page must be locked by caller.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-int __cleancache_get_page(struct page *page)
-{
- int ret = -1;
- int pool_id;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops) {
- cleancache_failed_gets++;
- goto out;
- }
-
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
- if (pool_id < 0)
- goto out;
-
- if (cleancache_get_key(page->mapping->host, &key) < 0)
- goto out;
-
- ret = cleancache_ops->get_page(pool_id, key, page->index, page);
- if (ret == 0)
- cleancache_succ_gets++;
- else
- cleancache_failed_gets++;
-out:
- return ret;
-}
-EXPORT_SYMBOL(__cleancache_get_page);
-
-/*
- * "Put" data from a page to cleancache and associate it with the
- * (previously-obtained per-filesystem) poolid and the page's,
- * inode and page index. Page must be locked. Note that a put_page
- * always "succeeds", though a subsequent get_page may succeed or fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_put_page(struct page *page)
-{
- int pool_id;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops) {
- cleancache_puts++;
- return;
- }
-
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
- if (pool_id >= 0 &&
- cleancache_get_key(page->mapping->host, &key) >= 0) {
- cleancache_ops->put_page(pool_id, key, page->index, page);
- cleancache_puts++;
- }
-}
-EXPORT_SYMBOL(__cleancache_put_page);
-
-/*
- * Invalidate any data from cleancache associated with the poolid and the
- * page's inode and page index so that a subsequent "get" will fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_invalidate_page(struct address_space *mapping,
- struct page *page)
-{
- /* careful... page->mapping is NULL sometimes when this is called */
- int pool_id = mapping->host->i_sb->cleancache_poolid;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops)
- return;
-
- if (pool_id >= 0) {
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (cleancache_get_key(mapping->host, &key) >= 0) {
- cleancache_ops->invalidate_page(pool_id,
- key, page->index);
- cleancache_invalidates++;
- }
- }
-}
-EXPORT_SYMBOL(__cleancache_invalidate_page);
-
-/*
- * Invalidate all data from cleancache associated with the poolid and the
- * mappings's inode so that all subsequent gets to this poolid/inode
- * will fail.
- *
- * The function has two checks before any action is taken - whether
- * a backend is registered and whether the sb->cleancache_poolid
- * is correct.
- */
-void __cleancache_invalidate_inode(struct address_space *mapping)
-{
- int pool_id = mapping->host->i_sb->cleancache_poolid;
- struct cleancache_filekey key = { .u.key = { 0 } };
-
- if (!cleancache_ops)
- return;
-
- if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
- cleancache_ops->invalidate_inode(pool_id, key);
-}
-EXPORT_SYMBOL(__cleancache_invalidate_inode);
-
-/*
- * Called by any cleancache-enabled filesystem at time of unmount;
- * note that pool_id is surrendered and may be returned by a subsequent
- * cleancache_init_fs or cleancache_init_shared_fs.
- */
-void __cleancache_invalidate_fs(struct super_block *sb)
-{
- int pool_id;
-
- pool_id = sb->cleancache_poolid;
- sb->cleancache_poolid = CLEANCACHE_NO_POOL;
-
- if (cleancache_ops && pool_id >= 0)
- cleancache_ops->invalidate_fs(pool_id);
-}
-EXPORT_SYMBOL(__cleancache_invalidate_fs);
-
-static int __init init_cleancache(void)
-{
-#ifdef CONFIG_DEBUG_FS
- struct dentry *root = debugfs_create_dir("cleancache", NULL);
-
- debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets);
- debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets);
- debugfs_create_u64("puts", 0444, root, &cleancache_puts);
- debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates);
-#endif
- return 0;
-}
-module_init(init_cleancache)
diff --git a/mm/compaction.c b/mm/compaction.c
index 6e446094ce90..b4e94cda3019 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2280,6 +2280,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
unsigned long last_migrated_pfn;
const bool sync = cc->mode != MIGRATE_ASYNC;
bool update_cached;
+ unsigned int nr_succeeded = 0;
/*
* These counters track activities during zone compaction. Initialize
@@ -2398,10 +2399,10 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
err = migrate_pages(&cc->migratepages, compaction_alloc,
compaction_free, (unsigned long)cc, cc->mode,
- MR_COMPACTION, NULL);
+ MR_COMPACTION, &nr_succeeded);
- trace_mm_compaction_migratepages(cc->nr_migratepages, err,
- &cc->migratepages);
+ trace_mm_compaction_migratepages(cc->nr_migratepages,
+ nr_succeeded);
/* All pages were either migrated or will be released */
cc->nr_migratepages = 0;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index e92497895202..1dd153c31c9e 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -11,7 +11,6 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/mm.h>
-#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -23,9 +22,6 @@
#define DAMON_MIN_REGION 1
#endif
-/* Get a random number in [l, r) */
-#define damon_rand(l, r) (l + prandom_u32_max(r - l))
-
static DEFINE_MUTEX(damon_lock);
static int nr_running_ctxs;
@@ -53,17 +49,6 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end)
return region;
}
-/*
- * Add a region between two other regions
- */
-inline void damon_insert_region(struct damon_region *r,
- struct damon_region *prev, struct damon_region *next,
- struct damon_target *t)
-{
- __list_add(&r->list, &prev->list, &next->list);
- t->nr_regions++;
-}
-
void damon_add_region(struct damon_region *r, struct damon_target *t)
{
list_add_tail(&r->list, &t->regions_list);
@@ -106,8 +91,7 @@ struct damos *damon_new_scheme(
scheme->min_age_region = min_age_region;
scheme->max_age_region = max_age_region;
scheme->action = action;
- scheme->stat_count = 0;
- scheme->stat_sz = 0;
+ scheme->stat = (struct damos_stat){};
INIT_LIST_HEAD(&scheme->list);
scheme->quota.ms = quota->ms;
@@ -530,15 +514,17 @@ static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
static void kdamond_reset_aggregated(struct damon_ctx *c)
{
struct damon_target *t;
+ unsigned int ti = 0; /* target's index */
damon_for_each_target(t, c) {
struct damon_region *r;
damon_for_each_region(r, t) {
- trace_damon_aggregated(t, r, damon_nr_regions(t));
+ trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
r->last_nr_accesses = r->nr_accesses;
r->nr_accesses = 0;
}
+ ti++;
}
}
@@ -578,6 +564,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
struct damos_quota *quota = &s->quota;
unsigned long sz = r->ar.end - r->ar.start;
struct timespec64 begin, end;
+ unsigned long sz_applied = 0;
if (!s->wmarks.activated)
continue;
@@ -631,7 +618,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_split_region_at(c, t, r, sz);
}
ktime_get_coarse_ts64(&begin);
- c->primitive.apply_scheme(c, t, r, s);
+ sz_applied = c->primitive.apply_scheme(c, t, r, s);
ktime_get_coarse_ts64(&end);
quota->total_charged_ns += timespec64_to_ns(&end) -
timespec64_to_ns(&begin);
@@ -645,8 +632,11 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
r->age = 0;
update_stat:
- s->stat_count++;
- s->stat_sz += sz;
+ s->stat.nr_tried++;
+ s->stat.sz_tried += sz;
+ if (sz_applied)
+ s->stat.nr_applied++;
+ s->stat.sz_applied += sz_applied;
}
}
@@ -694,6 +684,8 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
if (time_after_eq(jiffies, quota->charged_from +
msecs_to_jiffies(
quota->reset_interval))) {
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ s->stat.qt_exceeds++;
quota->total_charged_sz += quota->charged_sz;
quota->charged_from = jiffies;
quota->charged_sz = 0;
@@ -733,7 +725,10 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
}
}
-#define sz_damon_region(r) (r->ar.end - r->ar.start)
+static inline unsigned long sz_damon_region(struct damon_region *r)
+{
+ return r->ar.end - r->ar.start;
+}
/*
* Merge two adjacent regions into one region
@@ -750,8 +745,6 @@ static void damon_merge_two_regions(struct damon_target *t,
damon_destroy_region(r, t);
}
-#define diff_of(a, b) (a > b ? a - b : b - a)
-
/*
* Merge adjacent regions having similar access frequencies
*
@@ -765,13 +758,13 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
struct damon_region *r, *prev = NULL, *next;
damon_for_each_region_safe(r, next, t) {
- if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres)
+ if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
r->age = 0;
else
r->age++;
if (prev && prev->ar.end == r->ar.start &&
- diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
+ abs(prev->nr_accesses - r->nr_accesses) <= thres &&
sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
damon_merge_two_regions(t, prev, r);
else
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index ad65436756af..5b899601e56c 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -105,7 +105,7 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
damon_for_each_scheme(s, c) {
rc = scnprintf(&buf[written], len - written,
- "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu\n",
+ "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
s->min_sz_region, s->max_sz_region,
s->min_nr_accesses, s->max_nr_accesses,
s->min_age_region, s->max_age_region,
@@ -117,7 +117,9 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
s->quota.weight_age,
s->wmarks.metric, s->wmarks.interval,
s->wmarks.high, s->wmarks.mid, s->wmarks.low,
- s->stat_count, s->stat_sz);
+ s->stat.nr_tried, s->stat.sz_tried,
+ s->stat.nr_applied, s->stat.sz_applied,
+ s->stat.qt_exceeds);
if (!rc)
return -ENOMEM;
@@ -213,6 +215,13 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
if (!damos_action_valid(action))
goto fail;
+ if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age)
+ goto fail;
+
+ if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low ||
+ wmarks.mid < wmarks.low)
+ goto fail;
+
pos += parsed;
scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
min_age, max_age, action, &quota, &wmarks);
@@ -355,7 +364,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
struct damon_ctx *ctx = file->private_data;
struct damon_target *t, *next_t;
bool id_is_pid = true;
- char *kbuf, *nrs;
+ char *kbuf;
unsigned long *targets;
ssize_t nr_targets;
ssize_t ret;
@@ -365,14 +374,13 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
- nrs = kbuf;
if (!strncmp(kbuf, "paddr\n", count)) {
id_is_pid = false;
/* target id is meaningless here, but we set it just for fun */
scnprintf(kbuf, count, "42 ");
}
- targets = str_to_target_ids(nrs, count, &nr_targets);
+ targets = str_to_target_ids(kbuf, count, &nr_targets);
if (!targets) {
ret = -ENOMEM;
goto out;
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index a496d6f203d6..5e8244f65a1a 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -73,7 +73,7 @@ static void __damon_pa_prepare_access_check(struct damon_ctx *ctx,
damon_pa_mkold(r->sampling_addr);
}
-void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
+static void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
{
struct damon_target *t;
struct damon_region *r;
@@ -192,7 +192,7 @@ static void __damon_pa_check_access(struct damon_ctx *ctx,
last_addr = r->sampling_addr;
}
-unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
+static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
{
struct damon_target *t;
struct damon_region *r;
@@ -213,14 +213,15 @@ bool damon_pa_target_valid(void *t)
return true;
}
-int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
- struct damon_region *r, struct damos *scheme)
+static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
{
- unsigned long addr;
+ unsigned long addr, applied;
LIST_HEAD(page_list);
if (scheme->action != DAMOS_PAGEOUT)
- return -EINVAL;
+ return 0;
for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
struct page *page = damon_get_page(PHYS_PFN(addr));
@@ -241,13 +242,14 @@ int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
put_page(page);
}
}
- reclaim_pages(&page_list);
+ applied = reclaim_pages(&page_list);
cond_resched();
- return 0;
+ return applied * PAGE_SIZE;
}
-int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme)
+static int damon_pa_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
{
switch (scheme->action) {
case DAMOS_PAGEOUT:
diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h
index 61f27037603e..e790cb5f8fe0 100644
--- a/mm/damon/prmtv-common.h
+++ b/mm/damon/prmtv-common.h
@@ -6,10 +6,6 @@
*/
#include <linux/damon.h>
-#include <linux/random.h>
-
-/* Get a random number in [l, r) */
-#define damon_rand(l, r) (l + prandom_u32_max(r - l))
struct page *damon_get_page(unsigned long pfn);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index dc1485044eaf..bc476cef688e 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -185,6 +185,36 @@ module_param(monitor_region_end, ulong, 0600);
static int kdamond_pid __read_mostly = -1;
module_param(kdamond_pid, int, 0400);
+/*
+ * Number of memory regions that tried to be reclaimed.
+ */
+static unsigned long nr_reclaim_tried_regions __read_mostly;
+module_param(nr_reclaim_tried_regions, ulong, 0400);
+
+/*
+ * Total bytes of memory regions that tried to be reclaimed.
+ */
+static unsigned long bytes_reclaim_tried_regions __read_mostly;
+module_param(bytes_reclaim_tried_regions, ulong, 0400);
+
+/*
+ * Number of memory regions that successfully be reclaimed.
+ */
+static unsigned long nr_reclaimed_regions __read_mostly;
+module_param(nr_reclaimed_regions, ulong, 0400);
+
+/*
+ * Total bytes of memory regions that successfully be reclaimed.
+ */
+static unsigned long bytes_reclaimed_regions __read_mostly;
+module_param(bytes_reclaimed_regions, ulong, 0400);
+
+/*
+ * Number of times that the time/space quota limits have exceeded
+ */
+static unsigned long nr_quota_exceeds __read_mostly;
+module_param(nr_quota_exceeds, ulong, 0400);
+
static struct damon_ctx *ctx;
static struct damon_target *target;
@@ -333,6 +363,21 @@ static void damon_reclaim_timer_fn(struct work_struct *work)
}
static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
+static int damon_reclaim_after_aggregation(struct damon_ctx *c)
+{
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c) {
+ nr_reclaim_tried_regions = s->stat.nr_tried;
+ bytes_reclaim_tried_regions = s->stat.sz_tried;
+ nr_reclaimed_regions = s->stat.nr_applied;
+ bytes_reclaimed_regions = s->stat.sz_applied;
+ nr_quota_exceeds = s->stat.qt_exceeds;
+ }
+ return 0;
+}
+
static int __init damon_reclaim_init(void)
{
ctx = damon_new_ctx();
@@ -340,6 +385,7 @@ static int __init damon_reclaim_init(void)
return -ENOMEM;
damon_pa_set_primitives(ctx);
+ ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
/* 4242 means nothing but fun */
target = damon_new_target(4242);
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 20a9a9d69eb1..89b6468da2b9 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -26,8 +26,10 @@
* 't->id' should be the pointer to the relevant 'struct pid' having reference
* count. Caller must put the returned task, unless it is NULL.
*/
-#define damon_get_task_struct(t) \
- (get_pid_task((struct pid *)t->id, PIDTYPE_PID))
+static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
+{
+ return get_pid_task((struct pid *)t->id, PIDTYPE_PID);
+}
/*
* Get the mm_struct of the given target
@@ -98,16 +100,6 @@ static unsigned long sz_range(struct damon_addr_range *r)
return r->end - r->start;
}
-static void swap_ranges(struct damon_addr_range *r1,
- struct damon_addr_range *r2)
-{
- struct damon_addr_range tmp;
-
- tmp = *r1;
- *r1 = *r2;
- *r2 = tmp;
-}
-
/*
* Find three regions separated by two biggest unmapped regions
*
@@ -146,9 +138,9 @@ static int __damon_va_three_regions(struct vm_area_struct *vma,
gap.start = last_vma->vm_end;
gap.end = vma->vm_start;
if (sz_range(&gap) > sz_range(&second_gap)) {
- swap_ranges(&gap, &second_gap);
+ swap(gap, second_gap);
if (sz_range(&second_gap) > sz_range(&first_gap))
- swap_ranges(&second_gap, &first_gap);
+ swap(second_gap, first_gap);
}
next:
last_vma = vma;
@@ -159,7 +151,7 @@ next:
/* Sort the two biggest gaps by address */
if (first_gap.start > second_gap.start)
- swap_ranges(&first_gap, &second_gap);
+ swap(first_gap, second_gap);
/* Store the result */
regions[0].start = ALIGN(start, DAMON_MIN_REGION);
@@ -240,13 +232,19 @@ static int damon_va_three_regions(struct damon_target *t,
static void __damon_va_init_regions(struct damon_ctx *ctx,
struct damon_target *t)
{
+ struct damon_target *ti;
struct damon_region *r;
struct damon_addr_range regions[3];
unsigned long sz = 0, nr_pieces;
- int i;
+ int i, tidx = 0;
if (damon_va_three_regions(t, regions)) {
- pr_err("Failed to get three regions of target %lu\n", t->id);
+ damon_for_each_target(ti, ctx) {
+ if (ti == t)
+ break;
+ tidx++;
+ }
+ pr_debug("Failed to get three regions of %dth target\n", tidx);
return;
}
@@ -272,7 +270,7 @@ static void __damon_va_init_regions(struct damon_ctx *ctx,
}
/* Initialize '->regions_list' of every target (task) */
-void damon_va_init(struct damon_ctx *ctx)
+static void damon_va_init(struct damon_ctx *ctx)
{
struct damon_target *t;
@@ -292,7 +290,8 @@ void damon_va_init(struct damon_ctx *ctx)
*
* Returns true if it is.
*/
-static bool damon_intersect(struct damon_region *r, struct damon_addr_range *re)
+static bool damon_intersect(struct damon_region *r,
+ struct damon_addr_range *re)
{
return !(r->ar.end <= re->start || re->end <= r->ar.start);
}
@@ -356,7 +355,7 @@ static void damon_va_apply_three_regions(struct damon_target *t,
/*
* Update regions for current memory mappings
*/
-void damon_va_update(struct damon_ctx *ctx)
+static void damon_va_update(struct damon_ctx *ctx)
{
struct damon_addr_range three_regions[3];
struct damon_target *t;
@@ -395,8 +394,65 @@ out:
return 0;
}
+#ifdef CONFIG_HUGETLB_PAGE
+static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ bool referenced = false;
+ pte_t entry = huge_ptep_get(pte);
+ struct page *page = pte_page(entry);
+
+ if (!page)
+ return;
+
+ get_page(page);
+
+ if (pte_young(entry)) {
+ referenced = true;
+ entry = pte_mkold(entry);
+ huge_ptep_set_access_flags(vma, addr, pte, entry,
+ vma->vm_flags & VM_WRITE);
+ }
+
+#ifdef CONFIG_MMU_NOTIFIER
+ if (mmu_notifier_clear_young(mm, addr,
+ addr + huge_page_size(hstate_vma(vma))))
+ referenced = true;
+#endif /* CONFIG_MMU_NOTIFIER */
+
+ if (referenced)
+ set_page_young(page);
+
+ set_page_idle(page);
+ put_page(page);
+}
+
+static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hstate *h = hstate_vma(walk->vma);
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(pte);
+ if (!pte_present(entry))
+ goto out;
+
+ damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_mkold_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
static const struct mm_walk_ops damon_mkold_ops = {
.pmd_entry = damon_mkold_pmd_entry,
+ .hugetlb_entry = damon_mkold_hugetlb_entry,
};
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -410,7 +466,7 @@ static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
* Functions for the access checking of the regions
*/
-static void damon_va_prepare_access_check(struct damon_ctx *ctx,
+static void __damon_va_prepare_access_check(struct damon_ctx *ctx,
struct mm_struct *mm, struct damon_region *r)
{
r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
@@ -418,7 +474,7 @@ static void damon_va_prepare_access_check(struct damon_ctx *ctx,
damon_va_mkold(mm, r->sampling_addr);
}
-void damon_va_prepare_access_checks(struct damon_ctx *ctx)
+static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
{
struct damon_target *t;
struct mm_struct *mm;
@@ -429,7 +485,7 @@ void damon_va_prepare_access_checks(struct damon_ctx *ctx)
if (!mm)
continue;
damon_for_each_region(r, t)
- damon_va_prepare_access_check(ctx, mm, r);
+ __damon_va_prepare_access_check(ctx, mm, r);
mmput(mm);
}
}
@@ -491,8 +547,47 @@ out:
return 0;
}
+#ifdef CONFIG_HUGETLB_PAGE
+static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct damon_young_walk_private *priv = walk->private;
+ struct hstate *h = hstate_vma(walk->vma);
+ struct page *page;
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(pte);
+ if (!pte_present(entry))
+ goto out;
+
+ page = pte_page(entry);
+ if (!page)
+ goto out;
+
+ get_page(page);
+
+ if (pte_young(entry) || !page_is_idle(page) ||
+ mmu_notifier_test_young(walk->mm, addr)) {
+ *priv->page_sz = huge_page_size(h);
+ priv->young = true;
+ }
+
+ put_page(page);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_young_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
static const struct mm_walk_ops damon_young_ops = {
.pmd_entry = damon_young_pmd_entry,
+ .hugetlb_entry = damon_young_hugetlb_entry,
};
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
@@ -515,7 +610,7 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
* mm 'mm_struct' for the given virtual address space
* r the region to be checked
*/
-static void damon_va_check_access(struct damon_ctx *ctx,
+static void __damon_va_check_access(struct damon_ctx *ctx,
struct mm_struct *mm, struct damon_region *r)
{
static struct mm_struct *last_mm;
@@ -539,7 +634,7 @@ static void damon_va_check_access(struct damon_ctx *ctx,
last_addr = r->sampling_addr;
}
-unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
+static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
{
struct damon_target *t;
struct mm_struct *mm;
@@ -551,7 +646,7 @@ unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
if (!mm)
continue;
damon_for_each_region(r, t) {
- damon_va_check_access(ctx, mm, r);
+ __damon_va_check_access(ctx, mm, r);
max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
}
mmput(mm);
@@ -579,32 +674,34 @@ bool damon_va_target_valid(void *target)
}
#ifndef CONFIG_ADVISE_SYSCALLS
-static int damos_madvise(struct damon_target *target, struct damon_region *r,
- int behavior)
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
{
- return -EINVAL;
+ return 0;
}
#else
-static int damos_madvise(struct damon_target *target, struct damon_region *r,
- int behavior)
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
{
struct mm_struct *mm;
- int ret = -ENOMEM;
+ unsigned long start = PAGE_ALIGN(r->ar.start);
+ unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start);
+ unsigned long applied;
mm = damon_get_mm(target);
if (!mm)
- goto out;
+ return 0;
- ret = do_madvise(mm, PAGE_ALIGN(r->ar.start),
- PAGE_ALIGN(r->ar.end - r->ar.start), behavior);
+ applied = do_madvise(mm, start, len, behavior) ? 0 : len;
mmput(mm);
-out:
- return ret;
+
+ return applied;
}
#endif /* CONFIG_ADVISE_SYSCALLS */
-int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
- struct damon_region *r, struct damos *scheme)
+static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
{
int madv_action;
@@ -627,14 +724,15 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
case DAMOS_STAT:
return 0;
default:
- return -EINVAL;
+ return 0;
}
return damos_madvise(t, r, madv_action);
}
-int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
- struct damon_region *r, struct damos *scheme)
+static int damon_va_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
{
switch (scheme->action) {
diff --git a/mm/debug.c b/mm/debug.c
index a05a39ff8fe4..bc9ac87f0e08 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -112,56 +112,8 @@ static void __dump_page(struct page *page)
type = "ksm ";
else if (PageAnon(page))
type = "anon ";
- else if (mapping) {
- struct inode *host;
- const struct address_space_operations *a_ops;
- struct hlist_node *dentry_first;
- struct dentry *dentry_ptr;
- struct dentry dentry;
- unsigned long ino;
-
- /*
- * mapping can be invalid pointer and we don't want to crash
- * accessing it, so probe everything depending on it carefully
- */
- if (get_kernel_nofault(host, &mapping->host) ||
- get_kernel_nofault(a_ops, &mapping->a_ops)) {
- pr_warn("failed to read mapping contents, not a valid kernel address?\n");
- goto out_mapping;
- }
-
- if (!host) {
- pr_warn("aops:%ps\n", a_ops);
- goto out_mapping;
- }
-
- if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
- get_kernel_nofault(ino, &host->i_ino)) {
- pr_warn("aops:%ps with invalid host inode %px\n",
- a_ops, host);
- goto out_mapping;
- }
-
- if (!dentry_first) {
- pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
- goto out_mapping;
- }
-
- dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
- if (get_kernel_nofault(dentry, dentry_ptr)) {
- pr_warn("aops:%ps ino:%lx with invalid dentry %px\n",
- a_ops, ino, dentry_ptr);
- } else {
- /*
- * if dentry is corrupted, the %pd handler may still
- * crash, but it's unlikely that we reach here with a
- * corrupted struct page
- */
- pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n",
- a_ops, ino, &dentry);
- }
- }
-out_mapping:
+ else if (mapping)
+ dump_mapping(mapping);
BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
pr_warn("%sflags: %pGp%s\n", type, &head->flags,
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 228e3954b90c..a7ac97c76762 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -652,7 +652,7 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args)
set_pte_at(args->mm, args->vaddr, args->ptep, pte);
flush_dcache_page(page);
barrier();
- pte_clear(args->mm, args->vaddr, args->ptep);
+ ptep_clear(args->mm, args->vaddr, args->ptep);
pte = ptep_get(args->ptep);
WARN_ON(!pte_none(pte));
}
@@ -888,8 +888,8 @@ static void __init swap_migration_tests(struct pgtable_debug_args *args)
pr_debug("Validating swap migration\n");
/*
- * make_migration_entry() expects given page to be
- * locked, otherwise it stumbles upon a BUG_ON().
+ * make_[readable|writable]_migration_entry() expects given page to
+ * be locked, otherwise it stumbles upon a BUG_ON().
*/
__SetPageLocked(page);
swp = make_writable_migration_entry(page_to_pfn(page));
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 64b537b3ccb0..a7eb5d0eb2da 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -152,7 +152,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
else if ((boundary < size) || (boundary & (boundary - 1)))
return NULL;
- retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
+ retval = kmalloc(sizeof(*retval), GFP_KERNEL);
if (!retval)
return retval;
diff --git a/mm/filemap.c b/mm/filemap.c
index 2fd9b2f24025..ad8c39d90bf9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/file.h>
@@ -34,13 +35,13 @@
#include <linux/cpuset.h>
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
-#include <linux/cleancache.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
+#include <linux/migrate.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -149,16 +150,6 @@ static void filemap_unaccount_folio(struct address_space *mapping,
{
long nr;
- /*
- * if we're uptodate, flush out into the cleancache, otherwise
- * invalidate any existing cleancache entries. We can't leave
- * stale data around in the cleancache once our page is gone
- */
- if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio))
- cleancache_put_page(&folio->page);
- else
- cleancache_invalidate_page(mapping, &folio->page);
-
VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
int mapcount;
@@ -231,17 +222,15 @@ void __filemap_remove_folio(struct folio *folio, void *shadow)
void filemap_free_folio(struct address_space *mapping, struct folio *folio)
{
void (*freepage)(struct page *);
+ int refs = 1;
freepage = mapping->a_ops->freepage;
if (freepage)
freepage(&folio->page);
- if (folio_test_large(folio) && !folio_test_hugetlb(folio)) {
- folio_ref_sub(folio, folio_nr_pages(folio));
- VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio);
- } else {
- folio_put(folio);
- }
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ refs = folio_nr_pages(folio);
+ folio_put_refs(folio, refs);
}
/**
@@ -1388,6 +1377,95 @@ repeat:
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}
+#ifdef CONFIG_MIGRATION
+/**
+ * migration_entry_wait_on_locked - Wait for a migration entry to be removed
+ * @entry: migration swap entry.
+ * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required
+ * for pte entries, pass NULL for pmd entries.
+ * @ptl: already locked ptl. This function will drop the lock.
+ *
+ * Wait for a migration entry referencing the given page to be removed. This is
+ * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except
+ * this can be called without taking a reference on the page. Instead this
+ * should be called while holding the ptl for the migration entry referencing
+ * the page.
+ *
+ * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock().
+ *
+ * This follows the same logic as folio_wait_bit_common() so see the comments
+ * there.
+ */
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+ spinlock_t *ptl)
+{
+ struct wait_page_queue wait_page;
+ wait_queue_entry_t *wait = &wait_page.wait;
+ bool thrashing = false;
+ bool delayacct = false;
+ unsigned long pflags;
+ wait_queue_head_t *q;
+ struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+
+ q = folio_waitqueue(folio);
+ if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+ if (!folio_test_swapbacked(folio)) {
+ delayacct_thrashing_start();
+ delayacct = true;
+ }
+ psi_memstall_enter(&pflags);
+ thrashing = true;
+ }
+
+ init_wait(wait);
+ wait->func = wake_page_function;
+ wait_page.folio = folio;
+ wait_page.bit_nr = PG_locked;
+ wait->flags = 0;
+
+ spin_lock_irq(&q->lock);
+ folio_set_waiters(folio);
+ if (!folio_trylock_flag(folio, PG_locked, wait))
+ __add_wait_queue_entry_tail(q, wait);
+ spin_unlock_irq(&q->lock);
+
+ /*
+ * If a migration entry exists for the page the migration path must hold
+ * a valid reference to the page, and it must take the ptl to remove the
+ * migration entry. So the page is valid until the ptl is dropped.
+ */
+ if (ptep)
+ pte_unmap_unlock(ptep, ptl);
+ else
+ spin_unlock(ptl);
+
+ for (;;) {
+ unsigned int flags;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* Loop until we've been woken or interrupted */
+ flags = smp_load_acquire(&wait->flags);
+ if (!(flags & WQ_FLAG_WOKEN)) {
+ if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
+ break;
+
+ io_schedule();
+ continue;
+ }
+ break;
+ }
+
+ finish_wait(q, wait);
+
+ if (thrashing) {
+ if (delayacct)
+ delayacct_thrashing_end();
+ psi_memstall_leave(&pflags);
+ }
+}
+#endif
+
void folio_wait_bit(struct folio *folio, int bit_nr)
{
folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 130e301c5ac0..6f69b044a8cc 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -27,27 +27,7 @@ DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key);
* may be registered, but implementations can never deregister. This
* is a simple singly-linked list of all registered implementations.
*/
-static struct frontswap_ops *frontswap_ops __read_mostly;
-
-#define for_each_frontswap_ops(ops) \
- for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next)
-
-/*
- * If enabled, frontswap_store will return failure even on success. As
- * a result, the swap subsystem will always write the page to swap, in
- * effect converting frontswap into a writethrough cache. In this mode,
- * there is no direct reduction in swap writes, but a frontswap backend
- * can unilaterally "reclaim" any pages in use with no data loss, thus
- * providing increases control over maximum memory usage due to frontswap.
- */
-static bool frontswap_writethrough_enabled __read_mostly;
-
-/*
- * If enabled, the underlying tmem implementation is capable of doing
- * exclusive gets, so frontswap_load, on a successful tmem_get must
- * mark the page as no longer in frontswap AND mark it dirty.
- */
-static bool frontswap_tmem_exclusive_gets_enabled __read_mostly;
+static const struct frontswap_ops *frontswap_ops __read_mostly;
#ifdef CONFIG_DEBUG_FS
/*
@@ -114,87 +94,22 @@ static inline void inc_frontswap_invalidates(void) { }
/*
* Register operations for frontswap
*/
-void frontswap_register_ops(struct frontswap_ops *ops)
+int frontswap_register_ops(const struct frontswap_ops *ops)
{
- DECLARE_BITMAP(a, MAX_SWAPFILES);
- DECLARE_BITMAP(b, MAX_SWAPFILES);
- struct swap_info_struct *si;
- unsigned int i;
-
- bitmap_zero(a, MAX_SWAPFILES);
- bitmap_zero(b, MAX_SWAPFILES);
-
- spin_lock(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- if (!WARN_ON(!si->frontswap_map))
- set_bit(si->type, a);
- }
- spin_unlock(&swap_lock);
-
- /* the new ops needs to know the currently active swap devices */
- for_each_set_bit(i, a, MAX_SWAPFILES)
- ops->init(i);
-
- /*
- * Setting frontswap_ops must happen after the ops->init() calls
- * above; cmpxchg implies smp_mb() which will ensure the init is
- * complete at this point.
- */
- do {
- ops->next = frontswap_ops;
- } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
+ if (frontswap_ops)
+ return -EINVAL;
+ frontswap_ops = ops;
static_branch_inc(&frontswap_enabled_key);
-
- spin_lock(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- if (si->frontswap_map)
- set_bit(si->type, b);
- }
- spin_unlock(&swap_lock);
-
- /*
- * On the very unlikely chance that a swap device was added or
- * removed between setting the "a" list bits and the ops init
- * calls, we re-check and do init or invalidate for any changed
- * bits.
- */
- if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) {
- for (i = 0; i < MAX_SWAPFILES; i++) {
- if (!test_bit(i, a) && test_bit(i, b))
- ops->init(i);
- else if (test_bit(i, a) && !test_bit(i, b))
- ops->invalidate_area(i);
- }
- }
-}
-EXPORT_SYMBOL(frontswap_register_ops);
-
-/*
- * Enable/disable frontswap writethrough (see above).
- */
-void frontswap_writethrough(bool enable)
-{
- frontswap_writethrough_enabled = enable;
-}
-EXPORT_SYMBOL(frontswap_writethrough);
-
-/*
- * Enable/disable frontswap exclusive gets (see above).
- */
-void frontswap_tmem_exclusive_gets(bool enable)
-{
- frontswap_tmem_exclusive_gets_enabled = enable;
+ return 0;
}
-EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
/*
* Called when a swap device is swapon'd.
*/
-void __frontswap_init(unsigned type, unsigned long *map)
+void frontswap_init(unsigned type, unsigned long *map)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(sis == NULL);
@@ -210,20 +125,16 @@ void __frontswap_init(unsigned type, unsigned long *map)
* p->frontswap set to something valid to work properly.
*/
frontswap_map_set(sis, map);
-
- for_each_frontswap_ops(ops)
- ops->init(type);
+ frontswap_ops->init(type);
}
-EXPORT_SYMBOL(__frontswap_init);
-bool __frontswap_test(struct swap_info_struct *sis,
+static bool __frontswap_test(struct swap_info_struct *sis,
pgoff_t offset)
{
if (sis->frontswap_map)
return test_bit(offset, sis->frontswap_map);
return false;
}
-EXPORT_SYMBOL(__frontswap_test);
static inline void __frontswap_set(struct swap_info_struct *sis,
pgoff_t offset)
@@ -253,7 +164,6 @@ int __frontswap_store(struct page *page)
int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry);
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
@@ -267,28 +177,19 @@ int __frontswap_store(struct page *page)
*/
if (__frontswap_test(sis, offset)) {
__frontswap_clear(sis, offset);
- for_each_frontswap_ops(ops)
- ops->invalidate_page(type, offset);
+ frontswap_ops->invalidate_page(type, offset);
}
- /* Try to store in each implementation, until one succeeds. */
- for_each_frontswap_ops(ops) {
- ret = ops->store(type, offset, page);
- if (!ret) /* successful store */
- break;
- }
+ ret = frontswap_ops->store(type, offset, page);
if (ret == 0) {
__frontswap_set(sis, offset);
inc_frontswap_succ_stores();
} else {
inc_frontswap_failed_stores();
}
- if (frontswap_writethrough_enabled)
- /* report failure so swap also writes to swap device */
- ret = -1;
+
return ret;
}
-EXPORT_SYMBOL(__frontswap_store);
/*
* "Get" data from frontswap associated with swaptype and offset that were
@@ -302,7 +203,6 @@ int __frontswap_load(struct page *page)
int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry);
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
@@ -312,21 +212,11 @@ int __frontswap_load(struct page *page)
return -1;
/* Try loading from each implementation, until one succeeds. */
- for_each_frontswap_ops(ops) {
- ret = ops->load(type, offset, page);
- if (!ret) /* successful load */
- break;
- }
- if (ret == 0) {
+ ret = frontswap_ops->load(type, offset, page);
+ if (ret == 0)
inc_frontswap_loads();
- if (frontswap_tmem_exclusive_gets_enabled) {
- SetPageDirty(page);
- __frontswap_clear(sis, offset);
- }
- }
return ret;
}
-EXPORT_SYMBOL(__frontswap_load);
/*
* Invalidate any data from frontswap associated with the specified swaptype
@@ -335,7 +225,6 @@ EXPORT_SYMBOL(__frontswap_load);
void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
@@ -343,12 +232,10 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
if (!__frontswap_test(sis, offset))
return;
- for_each_frontswap_ops(ops)
- ops->invalidate_page(type, offset);
+ frontswap_ops->invalidate_page(type, offset);
__frontswap_clear(sis, offset);
inc_frontswap_invalidates();
}
-EXPORT_SYMBOL(__frontswap_invalidate_page);
/*
* Invalidate all data from frontswap associated with all offsets for the
@@ -357,7 +244,6 @@ EXPORT_SYMBOL(__frontswap_invalidate_page);
void __frontswap_invalidate_area(unsigned type)
{
struct swap_info_struct *sis = swap_info[type];
- struct frontswap_ops *ops;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
@@ -365,123 +251,10 @@ void __frontswap_invalidate_area(unsigned type)
if (sis->frontswap_map == NULL)
return;
- for_each_frontswap_ops(ops)
- ops->invalidate_area(type);
+ frontswap_ops->invalidate_area(type);
atomic_set(&sis->frontswap_pages, 0);
bitmap_zero(sis->frontswap_map, sis->max);
}
-EXPORT_SYMBOL(__frontswap_invalidate_area);
-
-static unsigned long __frontswap_curr_pages(void)
-{
- unsigned long totalpages = 0;
- struct swap_info_struct *si = NULL;
-
- assert_spin_locked(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list)
- totalpages += atomic_read(&si->frontswap_pages);
- return totalpages;
-}
-
-static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
- int *swapid)
-{
- int ret = -EINVAL;
- struct swap_info_struct *si = NULL;
- int si_frontswap_pages;
- unsigned long total_pages_to_unuse = total;
- unsigned long pages = 0, pages_to_unuse = 0;
-
- assert_spin_locked(&swap_lock);
- plist_for_each_entry(si, &swap_active_head, list) {
- si_frontswap_pages = atomic_read(&si->frontswap_pages);
- if (total_pages_to_unuse < si_frontswap_pages) {
- pages = pages_to_unuse = total_pages_to_unuse;
- } else {
- pages = si_frontswap_pages;
- pages_to_unuse = 0; /* unuse all */
- }
- /* ensure there is enough RAM to fetch pages from frontswap */
- if (security_vm_enough_memory_mm(current->mm, pages)) {
- ret = -ENOMEM;
- continue;
- }
- vm_unacct_memory(pages);
- *unused = pages_to_unuse;
- *swapid = si->type;
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-/*
- * Used to check if it's necessary and feasible to unuse pages.
- * Return 1 when nothing to do, 0 when need to shrink pages,
- * error code when there is an error.
- */
-static int __frontswap_shrink(unsigned long target_pages,
- unsigned long *pages_to_unuse,
- int *type)
-{
- unsigned long total_pages = 0, total_pages_to_unuse;
-
- assert_spin_locked(&swap_lock);
-
- total_pages = __frontswap_curr_pages();
- if (total_pages <= target_pages) {
- /* Nothing to do */
- *pages_to_unuse = 0;
- return 1;
- }
- total_pages_to_unuse = total_pages - target_pages;
- return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type);
-}
-
-/*
- * Frontswap, like a true swap device, may unnecessarily retain pages
- * under certain circumstances; "shrink" frontswap is essentially a
- * "partial swapoff" and works by calling try_to_unuse to attempt to
- * unuse enough frontswap pages to attempt to -- subject to memory
- * constraints -- reduce the number of pages in frontswap to the
- * number given in the parameter target_pages.
- */
-void frontswap_shrink(unsigned long target_pages)
-{
- unsigned long pages_to_unuse = 0;
- int type, ret;
-
- /*
- * we don't want to hold swap_lock while doing a very
- * lengthy try_to_unuse, but swap_list may change
- * so restart scan from swap_active_head each time
- */
- spin_lock(&swap_lock);
- ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
- spin_unlock(&swap_lock);
- if (ret == 0)
- try_to_unuse(type, true, pages_to_unuse);
- return;
-}
-EXPORT_SYMBOL(frontswap_shrink);
-
-/*
- * Count and return the number of frontswap pages across all
- * swap devices. This is exported so that backend drivers can
- * determine current usage without reading debugfs.
- */
-unsigned long frontswap_curr_pages(void)
-{
- unsigned long totalpages = 0;
-
- spin_lock(&swap_lock);
- totalpages = __frontswap_curr_pages();
- spin_unlock(&swap_lock);
-
- return totalpages;
-}
-EXPORT_SYMBOL(frontswap_curr_pages);
static int __init init_frontswap(void)
{
diff --git a/mm/gup.c b/mm/gup.c
index 2c51e9748a6a..f0af462ac1e2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -642,12 +642,17 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
}
retry:
if (!pmd_present(pmdval)) {
+ /*
+ * Should never reach here, if thp migration is not supported;
+ * Otherwise, it must be a thp migration entry.
+ */
+ VM_BUG_ON(!thp_migration_supported() ||
+ !is_pmd_migration_entry(pmdval));
+
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
- VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(pmdval));
- if (is_pmd_migration_entry(pmdval))
- pmd_migration_entry_wait(mm, pmd);
+
+ pmd_migration_entry_wait(mm, pmd);
pmdval = READ_ONCE(*pmd);
/*
* MADV_DONTNEED may convert the pmd to null because
@@ -1672,21 +1677,22 @@ size_t fault_in_writeable(char __user *uaddr, size_t size)
if (unlikely(size == 0))
return 0;
+ if (!user_write_access_begin(uaddr, size))
+ return size;
if (!PAGE_ALIGNED(uaddr)) {
- if (unlikely(__put_user(0, uaddr) != 0))
- return size;
+ unsafe_put_user(0, uaddr, out);
uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
}
end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
if (unlikely(end < start))
end = NULL;
while (uaddr != end) {
- if (unlikely(__put_user(0, uaddr) != 0))
- goto out;
+ unsafe_put_user(0, uaddr, out);
uaddr += PAGE_SIZE;
}
out:
+ user_write_access_end();
if (size > uaddr - start)
return size - (uaddr - start);
return 0;
@@ -1771,21 +1777,22 @@ size_t fault_in_readable(const char __user *uaddr, size_t size)
if (unlikely(size == 0))
return 0;
+ if (!user_read_access_begin(uaddr, size))
+ return size;
if (!PAGE_ALIGNED(uaddr)) {
- if (unlikely(__get_user(c, uaddr) != 0))
- return size;
+ unsafe_get_user(c, uaddr, out);
uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
}
end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
if (unlikely(end < start))
end = NULL;
while (uaddr != end) {
- if (unlikely(__get_user(c, uaddr) != 0))
- goto out;
+ unsafe_get_user(c, uaddr, out);
uaddr += PAGE_SIZE;
}
out:
+ user_read_access_end();
(void)c;
if (size > uaddr - start)
return size - (uaddr - start);
diff --git a/mm/hmm.c b/mm/hmm.c
index 842e26599238..bd56641c79d4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f58524394dc1..406a3c28c026 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1322,7 +1322,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
* We can only reuse the page if nobody else maps the huge page or it's
* part.
*/
- if (reuse_swap_page(page, NULL)) {
+ if (reuse_swap_page(page)) {
pmd_t entry;
entry = pmd_mkyoung(orig_pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -2542,38 +2542,28 @@ int total_mapcount(struct page *page)
* need full accuracy to avoid breaking page pinning, because
* page_trans_huge_mapcount() is slower than page_mapcount().
*/
-int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
+int page_trans_huge_mapcount(struct page *page)
{
- int i, ret, _total_mapcount, mapcount;
+ int i, ret;
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
- if (likely(!PageTransCompound(page))) {
- mapcount = atomic_read(&page->_mapcount) + 1;
- if (total_mapcount)
- *total_mapcount = mapcount;
- return mapcount;
- }
+ if (likely(!PageTransCompound(page)))
+ return atomic_read(&page->_mapcount) + 1;
page = compound_head(page);
- _total_mapcount = ret = 0;
+ ret = 0;
for (i = 0; i < thp_nr_pages(page); i++) {
- mapcount = atomic_read(&page[i]._mapcount) + 1;
+ int mapcount = atomic_read(&page[i]._mapcount) + 1;
ret = max(ret, mapcount);
- _total_mapcount += mapcount;
}
- if (PageDoubleMap(page)) {
+
+ if (PageDoubleMap(page))
ret -= 1;
- _total_mapcount -= thp_nr_pages(page);
- }
- mapcount = compound_mapcount(page);
- ret += mapcount;
- _total_mapcount += mapcount;
- if (total_mapcount)
- *total_mapcount = _total_mapcount;
- return ret;
+
+ return ret + compound_mapcount(page);
}
/* Racy check whether the huge page can be split */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a1baa198519a..61895cc01d09 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4684,8 +4684,8 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
struct page *new_page)
{
__SetPageUptodate(new_page);
- set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
hugepage_add_new_anon_rmap(new_page, vma, addr);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
ClearHPageRestoreReserve(new_page);
SetHPageMigratable(new_page);
@@ -5259,10 +5259,10 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
- set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page, true);
hugepage_add_new_anon_rmap(new_page, vma, haddr);
+ set_huge_pte_at(mm, haddr, ptep,
+ make_huge_pte(vma, new_page, 1));
SetHPageMigratable(new_page);
/* Make the old page be freed below */
new_page = old_page;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 79d93534ef1e..f9942841df18 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -123,29 +123,58 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
}
}
+static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
+{
+ int node;
+
+ for_each_node(node)
+ kfree(h_cgroup->nodeinfo[node]);
+ kfree(h_cgroup);
+}
+
static struct cgroup_subsys_state *
hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
struct hugetlb_cgroup *h_cgroup;
+ int node;
+
+ h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids),
+ GFP_KERNEL);
- h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
if (!h_cgroup)
return ERR_PTR(-ENOMEM);
if (!parent_h_cgroup)
root_h_cgroup = h_cgroup;
+ /*
+ * TODO: this routine can waste much memory for nodes which will
+ * never be onlined. It's better to use memory hotplug callback
+ * function.
+ */
+ for_each_node(node) {
+ /* Set node_to_alloc to -1 for offline nodes. */
+ int node_to_alloc =
+ node_state(node, N_NORMAL_MEMORY) ? node : -1;
+ h_cgroup->nodeinfo[node] =
+ kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
+ GFP_KERNEL, node_to_alloc);
+ if (!h_cgroup->nodeinfo[node])
+ goto fail_alloc_nodeinfo;
+ }
+
hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
return &h_cgroup->css;
+
+fail_alloc_nodeinfo:
+ hugetlb_cgroup_free(h_cgroup);
+ return ERR_PTR(-ENOMEM);
}
static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct hugetlb_cgroup *h_cgroup;
-
- h_cgroup = hugetlb_cgroup_from_css(css);
- kfree(h_cgroup);
+ hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
}
/*
@@ -289,7 +318,17 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
return;
__set_hugetlb_cgroup(page, h_cg, rsvd);
- return;
+ if (!rsvd) {
+ unsigned long usage =
+ h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+ /*
+ * This write is not atomic due to fetching usage and writing
+ * to it, but that's fine because we call this with
+ * hugetlb_lock held anyway.
+ */
+ WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+ usage + nr_pages);
+ }
}
void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
@@ -328,8 +367,17 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
if (rsvd)
css_put(&h_cg->css);
-
- return;
+ else {
+ unsigned long usage =
+ h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+ /*
+ * This write is not atomic due to fetching usage and writing
+ * to it, but that's fine because we call this with
+ * hugetlb_lock held anyway.
+ */
+ WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+ usage - nr_pages);
+ }
}
void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
@@ -418,6 +466,59 @@ enum {
RES_RSVD_FAILCNT,
};
+static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
+{
+ int nid;
+ struct cftype *cft = seq_cft(seq);
+ int idx = MEMFILE_IDX(cft->private);
+ bool legacy = MEMFILE_ATTR(cft->private);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
+ struct cgroup_subsys_state *css;
+ unsigned long usage;
+
+ if (legacy) {
+ /* Add up usage across all nodes for the non-hierarchical total. */
+ usage = 0;
+ for_each_node_state(nid, N_MEMORY)
+ usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
+ seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
+
+ /* Simply print the per-node usage for the non-hierarchical total. */
+ for_each_node_state(nid, N_MEMORY)
+ seq_printf(seq, " N%d=%lu", nid,
+ READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
+ PAGE_SIZE);
+ seq_putc(seq, '\n');
+ }
+
+ /*
+ * The hierarchical total is pretty much the value recorded by the
+ * counter, so use that.
+ */
+ seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
+ page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
+
+ /*
+ * For each node, transverse the css tree to obtain the hierarchical
+ * node usage.
+ */
+ for_each_node_state(nid, N_MEMORY) {
+ usage = 0;
+ rcu_read_lock();
+ css_for_each_descendant_pre(css, &h_cg->css) {
+ usage += READ_ONCE(hugetlb_cgroup_from_css(css)
+ ->nodeinfo[nid]
+ ->usage[idx]);
+ }
+ rcu_read_unlock();
+ seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft)
{
@@ -668,8 +769,14 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx)
events_local_file[idx]);
cft->flags = CFTYPE_NOT_ON_ROOT;
- /* NULL terminate the last cft */
+ /* Add the numa stat file */
cft = &h->cgroup_files_dfl[6];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
+ cft->seq_show = hugetlb_cgroup_read_numa_stat;
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* NULL terminate the last cft */
+ cft = &h->cgroup_files_dfl[7];
memset(cft, 0, sizeof(*cft));
WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
@@ -739,8 +846,14 @@ static void __init __hugetlb_cgroup_file_legacy_init(int idx)
cft->write = hugetlb_cgroup_reset;
cft->read_u64 = hugetlb_cgroup_read_u64;
- /* NULL terminate the last cft */
+ /* Add the numa stat file */
cft = &h->cgroup_files_legacy[8];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
+ cft->private = MEMFILE_PRIVATE(idx, 1);
+ cft->seq_show = hugetlb_cgroup_read_numa_stat;
+
+ /* NULL terminate the last cft */
+ cft = &h->cgroup_files_legacy[9];
memset(cft, 0, sizeof(*cft));
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
diff --git a/mm/internal.h b/mm/internal.h
index 26af8a5a5be3..d80300392a19 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -23,7 +23,7 @@ struct folio_batch;
#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
- __GFP_ATOMIC)
+ __GFP_ATOMIC|__GFP_NOLOCKDEP)
/* The GFP flags allowed during early boot */
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
@@ -167,11 +167,6 @@ extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason
extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
/*
- * in mm/memcontrol.c:
- */
-extern bool cgroup_memory_nokmem;
-
-/*
* in mm/page_alloc.c
*/
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 7c06db78a76c..92196562687b 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,7 +36,6 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
- nr_entries = filter_irq_stacks(entries, nr_entries);
return __stack_depot_save(entries, nr_entries, flags, can_alloc);
}
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 587da8995f2d..08291ed33e93 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -132,12 +132,23 @@ static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
{
void *object = qlink_to_object(qlink, cache);
+ struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
unsigned long flags;
if (IS_ENABLED(CONFIG_SLAB))
local_irq_save(flags);
/*
+ * If init_on_free is enabled and KASAN's free metadata is stored in
+ * the object, zero the metadata. Otherwise, the object's memory will
+ * not be properly zeroed, as KASAN saves the metadata after the slab
+ * allocator zeroes the object.
+ */
+ if (slab_want_init_on_free(cache) &&
+ cache->kasan_info.free_meta_offset == 0)
+ memzero_explicit(meta, sizeof(*meta));
+
+ /*
* As the object now gets freed from the quarantine, assume that its
* free track is no longer valid.
*/
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 4a4929b29a23..94136f84b449 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -498,7 +498,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
#else /* CONFIG_KASAN_VMALLOC */
-int kasan_module_alloc(void *addr, size_t size)
+int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask)
{
void *ret;
size_t scaled_size;
@@ -520,9 +520,14 @@ int kasan_module_alloc(void *addr, size_t size)
__builtin_return_address(0));
if (ret) {
+ struct vm_struct *vm = find_vm_area(addr);
__memset(ret, KASAN_SHADOW_INIT, shadow_size);
- find_vm_area(addr)->flags |= VM_KASAN;
+ vm->flags |= VM_KASAN;
kmemleak_ignore(ret);
+
+ if (vm->flags & VM_DEFER_KMEMLEAK)
+ kmemleak_vmalloc(vm, size, gfp_mask);
+
return 0;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2e1911cc3466..35f14d0a00a6 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -618,6 +618,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
goto out;
}
}
@@ -636,6 +637,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
+ count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
}
@@ -681,7 +683,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out;
}
if (!pte_write(pteval) && PageSwapCache(page) &&
- !reuse_swap_page(page, NULL)) {
+ !reuse_swap_page(page)) {
/*
* Page is in the swap cache and cannot be re-used.
* It cannot be collapsed into a THP.
@@ -756,11 +758,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* ptl mostly unnecessary.
*/
spin_lock(ptl);
- /*
- * paravirt calls inside pte_clear here are
- * superfluous.
- */
- pte_clear(vma->vm_mm, address, _pte);
+ ptep_clear(vma->vm_mm, address, _pte);
spin_unlock(ptl);
}
} else {
@@ -774,11 +772,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* inside page_remove_rmap().
*/
spin_lock(ptl);
- /*
- * paravirt calls inside pte_clear here are
- * superfluous.
- */
- pte_clear(vma->vm_mm, address, _pte);
+ ptep_clear(vma->vm_mm, address, _pte);
page_remove_rmap(src_page, false);
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
@@ -1261,6 +1255,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
continue;
} else {
result = SCAN_EXCEED_SWAP_PTE;
+ count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
goto out_unmap;
}
}
@@ -1270,6 +1265,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
goto out_unmap;
}
}
@@ -1298,6 +1294,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
+ count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
}
@@ -1306,7 +1303,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
/*
* Record which node the original page is from and save this
* information to khugepaged_node_load[].
- * Khupaged will allocate hugepage from the node has the max
+ * Khugepaged will allocate hugepage from the node has the max
* hit record.
*/
node = page_to_nid(page);
@@ -2014,6 +2011,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
+ count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
}
continue;
@@ -2064,6 +2062,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
if (result == SCAN_SUCCEED) {
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
node = khugepaged_find_target_node();
collapse_file(mm, file, start, hpage, node);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index b57383c17cf6..dc3758fdba68 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -381,15 +381,20 @@ static void dump_object_info(struct kmemleak_object *object)
static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
{
struct rb_node *rb = object_tree_root.rb_node;
+ unsigned long untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
while (rb) {
- struct kmemleak_object *object =
- rb_entry(rb, struct kmemleak_object, rb_node);
- if (ptr < object->pointer)
+ struct kmemleak_object *object;
+ unsigned long untagged_objp;
+
+ object = rb_entry(rb, struct kmemleak_object, rb_node);
+ untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer);
+
+ if (untagged_ptr < untagged_objp)
rb = object->rb_node.rb_left;
- else if (object->pointer + object->size <= ptr)
+ else if (untagged_objp + object->size <= untagged_ptr)
rb = object->rb_node.rb_right;
- else if (object->pointer == ptr || alias)
+ else if (untagged_objp == untagged_ptr || alias)
return object;
else {
kmemleak_warn("Found object by alias at 0x%08lx\n",
@@ -576,6 +581,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
struct kmemleak_object *object, *parent;
struct rb_node **link, *rb_parent;
unsigned long untagged_ptr;
+ unsigned long untagged_objp;
object = mem_pool_alloc(gfp);
if (!object) {
@@ -629,9 +635,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
while (*link) {
rb_parent = *link;
parent = rb_entry(rb_parent, struct kmemleak_object, rb_node);
- if (ptr + size <= parent->pointer)
+ untagged_objp = (unsigned long)kasan_reset_tag((void *)parent->pointer);
+ if (untagged_ptr + size <= untagged_objp)
link = &parent->rb_node.rb_left;
- else if (parent->pointer + parent->size <= ptr)
+ else if (untagged_objp + parent->size <= untagged_ptr)
link = &parent->rb_node.rb_right;
else {
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
diff --git a/mm/ksm.c b/mm/ksm.c
index 0662093237e4..c20bd4d9a0d9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/sched.h>
@@ -2575,8 +2576,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
return page; /* no need to copy it */
} else if (!anon_vma) {
return page; /* no need to copy it */
- } else if (anon_vma->root == vma->anon_vma->root &&
- page->index == linear_page_index(vma, address)) {
+ } else if (page->index == linear_page_index(vma, address) &&
+ anon_vma->root == vma->anon_vma->root) {
return page; /* still no need to copy it */
}
if (!PageUptodate(page))
diff --git a/mm/madvise.c b/mm/madvise.c
index 8c927202bbe6..5604064df464 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -18,6 +18,8 @@
#include <linux/fadvise.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/mm_inline.h>
+#include <linux/string.h>
#include <linux/uio.h>
#include <linux/ksm.h>
#include <linux/fs.h>
@@ -62,83 +64,122 @@ static int madvise_need_mmap_write(int behavior)
}
}
+#ifdef CONFIG_ANON_VMA_NAME
+static struct anon_vma_name *anon_vma_name_alloc(const char *name)
+{
+ struct anon_vma_name *anon_name;
+ size_t count;
+
+ /* Add 1 for NUL terminator at the end of the anon_name->name */
+ count = strlen(name) + 1;
+ anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL);
+ if (anon_name) {
+ kref_init(&anon_name->kref);
+ memcpy(anon_name->name, name, count);
+ }
+
+ return anon_name;
+}
+
+static void vma_anon_name_free(struct kref *kref)
+{
+ struct anon_vma_name *anon_name =
+ container_of(kref, struct anon_vma_name, kref);
+ kfree(anon_name);
+}
+
+static inline bool has_vma_anon_name(struct vm_area_struct *vma)
+{
+ return !vma->vm_file && vma->anon_name;
+}
+
+const char *vma_anon_name(struct vm_area_struct *vma)
+{
+ if (!has_vma_anon_name(vma))
+ return NULL;
+
+ mmap_assert_locked(vma->vm_mm);
+
+ return vma->anon_name->name;
+}
+
+void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
+{
+ if (!has_vma_anon_name(orig_vma))
+ return;
+
+ kref_get(&orig_vma->anon_name->kref);
+ new_vma->anon_name = orig_vma->anon_name;
+}
+
+void free_vma_anon_name(struct vm_area_struct *vma)
+{
+ struct anon_vma_name *anon_name;
+
+ if (!has_vma_anon_name(vma))
+ return;
+
+ anon_name = vma->anon_name;
+ vma->anon_name = NULL;
+ kref_put(&anon_name->kref, vma_anon_name_free);
+}
+
+/* mmap_lock should be write-locked */
+static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
+{
+ const char *anon_name;
+
+ if (!name) {
+ free_vma_anon_name(vma);
+ return 0;
+ }
+
+ anon_name = vma_anon_name(vma);
+ if (anon_name) {
+ /* Same name, nothing to do here */
+ if (!strcmp(name, anon_name))
+ return 0;
+
+ free_vma_anon_name(vma);
+ }
+ vma->anon_name = anon_vma_name_alloc(name);
+ if (!vma->anon_name)
+ return -ENOMEM;
+
+ return 0;
+}
+#else /* CONFIG_ANON_VMA_NAME */
+static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
+{
+ if (name)
+ return -EINVAL;
+
+ return 0;
+}
+#endif /* CONFIG_ANON_VMA_NAME */
/*
- * We can potentially split a vm area into separate
- * areas, each area with its own behavior.
+ * Update the vm_flags on region of a vma, splitting it or merging it as
+ * necessary. Must be called with mmap_sem held for writing;
*/
-static long madvise_behavior(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end, int behavior)
+static int madvise_update_vma(struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, unsigned long new_flags,
+ const char *name)
{
struct mm_struct *mm = vma->vm_mm;
- int error = 0;
+ int error;
pgoff_t pgoff;
- unsigned long new_flags = vma->vm_flags;
- switch (behavior) {
- case MADV_NORMAL:
- new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
- break;
- case MADV_SEQUENTIAL:
- new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
- break;
- case MADV_RANDOM:
- new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
- break;
- case MADV_DONTFORK:
- new_flags |= VM_DONTCOPY;
- break;
- case MADV_DOFORK:
- if (vma->vm_flags & VM_IO) {
- error = -EINVAL;
- goto out;
- }
- new_flags &= ~VM_DONTCOPY;
- break;
- case MADV_WIPEONFORK:
- /* MADV_WIPEONFORK is only supported on anonymous memory. */
- if (vma->vm_file || vma->vm_flags & VM_SHARED) {
- error = -EINVAL;
- goto out;
- }
- new_flags |= VM_WIPEONFORK;
- break;
- case MADV_KEEPONFORK:
- new_flags &= ~VM_WIPEONFORK;
- break;
- case MADV_DONTDUMP:
- new_flags |= VM_DONTDUMP;
- break;
- case MADV_DODUMP:
- if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
- error = -EINVAL;
- goto out;
- }
- new_flags &= ~VM_DONTDUMP;
- break;
- case MADV_MERGEABLE:
- case MADV_UNMERGEABLE:
- error = ksm_madvise(vma, start, end, behavior, &new_flags);
- if (error)
- goto out_convert_errno;
- break;
- case MADV_HUGEPAGE:
- case MADV_NOHUGEPAGE:
- error = hugepage_madvise(vma, &new_flags, behavior);
- if (error)
- goto out_convert_errno;
- break;
- }
-
- if (new_flags == vma->vm_flags) {
+ if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) {
*prev = vma;
- goto out;
+ return 0;
}
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx);
+ vma->vm_userfaultfd_ctx, name);
if (*prev) {
vma = *prev;
goto success;
@@ -147,23 +188,19 @@ static long madvise_behavior(struct vm_area_struct *vma,
*prev = vma;
if (start != vma->vm_start) {
- if (unlikely(mm->map_count >= sysctl_max_map_count)) {
- error = -ENOMEM;
- goto out;
- }
+ if (unlikely(mm->map_count >= sysctl_max_map_count))
+ return -ENOMEM;
error = __split_vma(mm, vma, start, 1);
if (error)
- goto out_convert_errno;
+ return error;
}
if (end != vma->vm_end) {
- if (unlikely(mm->map_count >= sysctl_max_map_count)) {
- error = -ENOMEM;
- goto out;
- }
+ if (unlikely(mm->map_count >= sysctl_max_map_count))
+ return -ENOMEM;
error = __split_vma(mm, vma, end, 0);
if (error)
- goto out_convert_errno;
+ return error;
}
success:
@@ -171,16 +208,13 @@ success:
* vm_flags is protected by the mmap_lock held in write mode.
*/
vma->vm_flags = new_flags;
+ if (!vma->vm_file) {
+ error = replace_vma_anon_name(vma, name);
+ if (error)
+ return error;
+ }
-out_convert_errno:
- /*
- * madvise() returns EAGAIN if kernel resources, such as
- * slab, are temporarily unavailable.
- */
- if (error == -ENOMEM)
- error = -EAGAIN;
-out:
- return error;
+ return 0;
}
#ifdef CONFIG_SWAP
@@ -930,6 +964,95 @@ static long madvise_remove(struct vm_area_struct *vma,
return error;
}
+/*
+ * Apply an madvise behavior to a region of a vma. madvise_update_vma
+ * will handle splitting a vm area into separate areas, each area with its own
+ * behavior.
+ */
+static int madvise_vma_behavior(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ unsigned long behavior)
+{
+ int error;
+ unsigned long new_flags = vma->vm_flags;
+
+ switch (behavior) {
+ case MADV_REMOVE:
+ return madvise_remove(vma, prev, start, end);
+ case MADV_WILLNEED:
+ return madvise_willneed(vma, prev, start, end);
+ case MADV_COLD:
+ return madvise_cold(vma, prev, start, end);
+ case MADV_PAGEOUT:
+ return madvise_pageout(vma, prev, start, end);
+ case MADV_FREE:
+ case MADV_DONTNEED:
+ return madvise_dontneed_free(vma, prev, start, end, behavior);
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
+ return madvise_populate(vma, prev, start, end, behavior);
+ case MADV_NORMAL:
+ new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
+ break;
+ case MADV_SEQUENTIAL:
+ new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
+ break;
+ case MADV_RANDOM:
+ new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
+ break;
+ case MADV_DONTFORK:
+ new_flags |= VM_DONTCOPY;
+ break;
+ case MADV_DOFORK:
+ if (vma->vm_flags & VM_IO)
+ return -EINVAL;
+ new_flags &= ~VM_DONTCOPY;
+ break;
+ case MADV_WIPEONFORK:
+ /* MADV_WIPEONFORK is only supported on anonymous memory. */
+ if (vma->vm_file || vma->vm_flags & VM_SHARED)
+ return -EINVAL;
+ new_flags |= VM_WIPEONFORK;
+ break;
+ case MADV_KEEPONFORK:
+ new_flags &= ~VM_WIPEONFORK;
+ break;
+ case MADV_DONTDUMP:
+ new_flags |= VM_DONTDUMP;
+ break;
+ case MADV_DODUMP:
+ if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL)
+ return -EINVAL;
+ new_flags &= ~VM_DONTDUMP;
+ break;
+ case MADV_MERGEABLE:
+ case MADV_UNMERGEABLE:
+ error = ksm_madvise(vma, start, end, behavior, &new_flags);
+ if (error)
+ goto out;
+ break;
+ case MADV_HUGEPAGE:
+ case MADV_NOHUGEPAGE:
+ error = hugepage_madvise(vma, &new_flags, behavior);
+ if (error)
+ goto out;
+ break;
+ }
+
+ error = madvise_update_vma(vma, prev, start, end, new_flags,
+ vma_anon_name(vma));
+
+out:
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
#ifdef CONFIG_MEMORY_FAILURE
/*
* Error injection support for memory error handling.
@@ -978,30 +1101,6 @@ static int madvise_inject_error(int behavior,
}
#endif
-static long
-madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- unsigned long start, unsigned long end, int behavior)
-{
- switch (behavior) {
- case MADV_REMOVE:
- return madvise_remove(vma, prev, start, end);
- case MADV_WILLNEED:
- return madvise_willneed(vma, prev, start, end);
- case MADV_COLD:
- return madvise_cold(vma, prev, start, end);
- case MADV_PAGEOUT:
- return madvise_pageout(vma, prev, start, end);
- case MADV_FREE:
- case MADV_DONTNEED:
- return madvise_dontneed_free(vma, prev, start, end, behavior);
- case MADV_POPULATE_READ:
- case MADV_POPULATE_WRITE:
- return madvise_populate(vma, prev, start, end, behavior);
- default:
- return madvise_behavior(vma, prev, start, end, behavior);
- }
-}
-
static bool
madvise_behavior_valid(int behavior)
{
@@ -1056,6 +1155,122 @@ process_madvise_behavior_valid(int behavior)
}
/*
+ * Walk the vmas in range [start,end), and call the visit function on each one.
+ * The visit function will get start and end parameters that cover the overlap
+ * between the current vma and the original range. Any unmapped regions in the
+ * original range will result in this function returning -ENOMEM while still
+ * calling the visit function on all of the existing vmas in the range.
+ * Must be called with the mmap_lock held for reading or writing.
+ */
+static
+int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long arg,
+ int (*visit)(struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, unsigned long arg))
+{
+ struct vm_area_struct *vma;
+ struct vm_area_struct *prev;
+ unsigned long tmp;
+ int unmapped_error = 0;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ * - different from the way of handling in mlock etc.
+ */
+ vma = find_vma_prev(mm, start, &prev);
+ if (vma && start > vma->vm_start)
+ prev = vma;
+
+ for (;;) {
+ int error;
+
+ /* Still start < end. */
+ if (!vma)
+ return -ENOMEM;
+
+ /* Here start < (end|vma->vm_end). */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ if (start >= end)
+ break;
+ }
+
+ /* Here vma->vm_start <= start < (end|vma->vm_end) */
+ tmp = vma->vm_end;
+ if (end < tmp)
+ tmp = end;
+
+ /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+ error = visit(vma, &prev, start, tmp, arg);
+ if (error)
+ return error;
+ start = tmp;
+ if (prev && start < prev->vm_end)
+ start = prev->vm_end;
+ if (start >= end)
+ break;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_lock */
+ vma = find_vma(mm, start);
+ }
+
+ return unmapped_error;
+}
+
+#ifdef CONFIG_ANON_VMA_NAME
+static int madvise_vma_anon_name(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ unsigned long name)
+{
+ int error;
+
+ /* Only anonymous mappings can be named */
+ if (vma->vm_file)
+ return -EBADF;
+
+ error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
+ (const char *)name);
+
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
+int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+ unsigned long len_in, const char *name)
+{
+ unsigned long end;
+ unsigned long len;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+ /* Check to see whether len was rounded up from small -ve to zero */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ return madvise_walk_vmas(mm, start, end, (unsigned long)name,
+ madvise_vma_anon_name);
+}
+#endif /* CONFIG_ANON_VMA_NAME */
+/*
* The madvise(2) system call.
*
* Applications can use madvise() to advise the kernel how it should
@@ -1127,10 +1342,8 @@ process_madvise_behavior_valid(int behavior)
*/
int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
{
- unsigned long end, tmp;
- struct vm_area_struct *vma, *prev;
- int unmapped_error = 0;
- int error = -EINVAL;
+ unsigned long end;
+ int error;
int write;
size_t len;
struct blk_plug plug;
@@ -1138,23 +1351,22 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
start = untagged_addr(start);
if (!madvise_behavior_valid(behavior))
- return error;
+ return -EINVAL;
if (!PAGE_ALIGNED(start))
- return error;
+ return -EINVAL;
len = PAGE_ALIGN(len_in);
/* Check to see whether len was rounded up from small -ve to zero */
if (len_in && !len)
- return error;
+ return -EINVAL;
end = start + len;
if (end < start)
- return error;
+ return -EINVAL;
- error = 0;
if (end == start)
- return error;
+ return 0;
#ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
@@ -1169,51 +1381,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
mmap_read_lock(mm);
}
- /*
- * If the interval [start,end) covers some unmapped address
- * ranges, just ignore them, but return -ENOMEM at the end.
- * - different from the way of handling in mlock etc.
- */
- vma = find_vma_prev(mm, start, &prev);
- if (vma && start > vma->vm_start)
- prev = vma;
-
blk_start_plug(&plug);
- for (;;) {
- /* Still start < end. */
- error = -ENOMEM;
- if (!vma)
- goto out;
-
- /* Here start < (end|vma->vm_end). */
- if (start < vma->vm_start) {
- unmapped_error = -ENOMEM;
- start = vma->vm_start;
- if (start >= end)
- goto out;
- }
-
- /* Here vma->vm_start <= start < (end|vma->vm_end) */
- tmp = vma->vm_end;
- if (end < tmp)
- tmp = end;
-
- /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma(vma, &prev, start, tmp, behavior);
- if (error)
- goto out;
- start = tmp;
- if (prev && start < prev->vm_end)
- start = prev->vm_end;
- error = unmapped_error;
- if (start >= end)
- goto out;
- if (prev)
- vma = prev->vm_next;
- else /* madvise_remove dropped mmap_lock */
- vma = find_vma(mm, start);
- }
-out:
+ error = madvise_walk_vmas(mm, start, end, behavior,
+ madvise_vma_behavior);
blk_finish_plug(&plug);
if (write)
mmap_write_unlock(mm);
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
index ea734f248fce..1b0ab8fcfd8b 100644
--- a/mm/mapping_dirty_helpers.c
+++ b/mm/mapping_dirty_helpers.c
@@ -3,6 +3,7 @@
#include <linux/hugetlb.h>
#include <linux/bitops.h>
#include <linux/mmu_notifier.h>
+#include <linux/mm_inline.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4a7b3ebf8e48..09d342c7cbd0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -84,7 +84,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
static bool cgroup_memory_nosocket __ro_after_init;
/* Kernel memory accounting disabled? */
-bool cgroup_memory_nokmem __ro_after_init;
+static bool cgroup_memory_nokmem __ro_after_init;
/* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP
@@ -629,11 +629,17 @@ static DEFINE_SPINLOCK(stats_flush_lock);
static DEFINE_PER_CPU(unsigned int, stats_updates);
static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
-static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
+static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
{
+ unsigned int x;
+
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
- if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
- atomic_inc(&stats_flush_threshold);
+
+ x = __this_cpu_add_return(stats_updates, abs(val));
+ if (x > MEMCG_CHARGE_BATCH) {
+ atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
+ __this_cpu_write(stats_updates, 0);
+ }
}
static void __mem_cgroup_flush_stats(void)
@@ -656,7 +662,7 @@ void mem_cgroup_flush_stats(void)
static void flush_memcg_stats_dwork(struct work_struct *w)
{
- mem_cgroup_flush_stats();
+ __mem_cgroup_flush_stats();
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
}
@@ -672,7 +678,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
return;
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
- memcg_rstat_updated(memcg);
+ memcg_rstat_updated(memcg, val);
}
/* idx can be of type enum memcg_stat_item or node_stat_item. */
@@ -705,7 +711,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
/* Update lruvec */
__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
- memcg_rstat_updated(memcg);
+ memcg_rstat_updated(memcg, val);
}
/**
@@ -789,7 +795,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
return;
__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
- memcg_rstat_updated(memcg);
+ memcg_rstat_updated(memcg, count);
}
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@ -1369,6 +1375,7 @@ static const struct memory_stat memory_stats[] = {
{ "pagetables", NR_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B },
{ "sock", MEMCG_SOCK },
+ { "vmalloc", MEMCG_VMALLOC },
{ "shmem", NR_SHMEM },
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
@@ -4850,6 +4857,17 @@ out_kfree:
return ret;
}
+#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
+static int mem_cgroup_slab_show(struct seq_file *m, void *p)
+{
+ /*
+ * Deprecated.
+ * Please, take a look at tools/cgroup/slabinfo.py .
+ */
+ return 0;
+}
+#endif
+
static struct cftype mem_cgroup_legacy_files[] = {
{
.name = "usage_in_bytes",
@@ -4950,7 +4968,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
{
.name = "kmem.slabinfo",
- .seq_show = memcg_slab_show,
+ .seq_show = mem_cgroup_slab_show,
},
#endif
{
@@ -5110,15 +5128,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *memcg;
- unsigned int size;
int node;
int __maybe_unused i;
long error = -ENOMEM;
- size = sizeof(struct mem_cgroup);
- size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
-
- memcg = kzalloc(size, GFP_KERNEL);
+ memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
if (!memcg)
return ERR_PTR(error);
@@ -6312,6 +6326,8 @@ static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
seq_printf(m, "oom_kill %lu\n",
atomic_long_read(&events[MEMCG_OOM_KILL]));
+ seq_printf(m, "oom_group_kill %lu\n",
+ atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
}
static int memory_events_show(struct seq_file *m, void *v)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f1c389f7e669..14ae5c18e776 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -58,6 +58,7 @@
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
+#include <linux/shmem_fs.h>
#include "internal.h"
#include "ras/ras_event.h"
@@ -722,7 +723,6 @@ static const char * const action_page_types[] = {
[MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
[MF_MSG_SLAB] = "kernel slab page",
[MF_MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
- [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned",
[MF_MSG_HUGE] = "huge page",
[MF_MSG_FREE_HUGE] = "free huge page",
[MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page",
@@ -737,7 +737,6 @@ static const char * const action_page_types[] = {
[MF_MSG_CLEAN_LRU] = "clean LRU page",
[MF_MSG_TRUNCATED_LRU] = "already truncated LRU page",
[MF_MSG_BUDDY] = "free buddy page",
- [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
[MF_MSG_UNKNOWN] = "unknown page",
@@ -867,6 +866,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
{
int ret;
struct address_space *mapping;
+ bool extra_pins;
delete_from_lru_cache(p);
@@ -896,17 +896,23 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
}
/*
+ * The shmem page is kept in page cache instead of truncating
+ * so is expected to have an extra refcount after error-handling.
+ */
+ extra_pins = shmem_mapping(mapping);
+
+ /*
* Truncation is a bit tricky. Enable it per file system for now.
*
* Open: to take i_rwsem or not for this? Right now we don't.
*/
ret = truncate_error_page(p, page_to_pfn(p), mapping);
+ if (has_extra_refcount(ps, p, extra_pins))
+ ret = MF_FAILED;
+
out:
unlock_page(p);
- if (has_extra_refcount(ps, p, false))
- ret = MF_FAILED;
-
return ret;
}
@@ -1154,6 +1160,22 @@ static int page_action(struct page_state *ps, struct page *p,
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
+static inline bool PageHWPoisonTakenOff(struct page *page)
+{
+ return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON;
+}
+
+void SetPageHWPoisonTakenOff(struct page *page)
+{
+ set_page_private(page, MAGIC_HWPOISON);
+}
+
+void ClearPageHWPoisonTakenOff(struct page *page)
+{
+ if (PageHWPoison(page))
+ set_page_private(page, 0);
+}
+
/*
* Return true if a page type of a given page is supported by hwpoison
* mechanism (while handling could fail), otherwise false. This function
@@ -1256,6 +1278,27 @@ out:
return ret;
}
+static int __get_unpoison_page(struct page *page)
+{
+ struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
+
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
+
+ /*
+ * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
+ * but also isolated from buddy freelist, so need to identify the
+ * state and have to cancel both operations to unpoison.
+ */
+ if (PageHWPoisonTakenOff(page))
+ return -EHWPOISON;
+
+ return get_page_unless_zero(page) ? 1 : 0;
+}
+
/**
* get_hwpoison_page() - Get refcount for memory error handling
* @p: Raw error page (hit by memory error)
@@ -1263,7 +1306,7 @@ out:
*
* get_hwpoison_page() takes a page refcount of an error page to handle memory
* error on it, after checking that the error page is in a well-defined state
- * (defined as a page-type we can successfully handle the memor error on it,
+ * (defined as a page-type we can successfully handle the memory error on it,
* such as LRU page and hugetlb page).
*
* Memory error handling could be triggered at any time on any type of page,
@@ -1272,18 +1315,26 @@ out:
* extra care for the error page's state (as done in __get_hwpoison_page()),
* and has some retry logic in get_any_page().
*
+ * When called from unpoison_memory(), the caller should already ensure that
+ * the given page has PG_hwpoison. So it's never reused for other page
+ * allocations, and __get_unpoison_page() never races with them.
+ *
* Return: 0 on failure,
* 1 on success for in-use pages in a well-defined state,
* -EIO for pages on which we can not handle memory errors,
* -EBUSY when get_hwpoison_page() has raced with page lifecycle
- * operations like allocation and free.
+ * operations like allocation and free,
+ * -EHWPOISON when the page is hwpoisoned and taken off from buddy.
*/
static int get_hwpoison_page(struct page *p, unsigned long flags)
{
int ret;
zone_pcp_disable(page_zone(p));
- ret = get_any_page(p, flags);
+ if (flags & MF_UNPOISON)
+ ret = __get_unpoison_page(p);
+ else
+ ret = get_any_page(p, flags);
zone_pcp_enable(page_zone(p));
return ret;
@@ -1494,14 +1545,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
lock_page(head);
page_flags = head->flags;
- if (!PageHWPoison(head)) {
- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
- num_poisoned_pages_dec();
- unlock_page(head);
- put_page(head);
- return 0;
- }
-
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
@@ -1615,6 +1658,8 @@ out:
return rc;
}
+static DEFINE_MUTEX(mf_mutex);
+
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
@@ -1641,7 +1686,6 @@ int memory_failure(unsigned long pfn, int flags)
int res = 0;
unsigned long page_flags;
bool retry = true;
- static DEFINE_MUTEX(mf_mutex);
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
@@ -1782,16 +1826,6 @@ try_again:
*/
page_flags = p->flags;
- /*
- * unpoison always clear PG_hwpoison inside page lock
- */
- if (!PageHWPoison(p)) {
- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
- num_poisoned_pages_dec();
- unlock_page(p);
- put_page(p);
- goto unlock_mutex;
- }
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
@@ -1955,6 +1989,28 @@ core_initcall(memory_failure_init);
pr_info(fmt, pfn); \
})
+static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p)
+{
+ if (TestClearPageHWPoison(p)) {
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ page_to_pfn(p), rs);
+ num_poisoned_pages_dec();
+ return 1;
+ }
+ return 0;
+}
+
+static inline int unpoison_taken_off_page(struct ratelimit_state *rs,
+ struct page *p)
+{
+ if (put_page_back_buddy(p)) {
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ page_to_pfn(p), rs);
+ return 0;
+ }
+ return -EBUSY;
+}
+
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
@@ -1971,8 +2027,7 @@ int unpoison_memory(unsigned long pfn)
{
struct page *page;
struct page *p;
- int freeit = 0;
- unsigned long flags = 0;
+ int ret = -EBUSY;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -1982,69 +2037,60 @@ int unpoison_memory(unsigned long pfn)
p = pfn_to_page(pfn);
page = compound_head(p);
+ mutex_lock(&mf_mutex);
+
if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_count(page) > 1) {
unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_mapped(page)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_mapping(page)) {
unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
pfn, &unpoison_rs);
- return 0;
- }
-
- /*
- * unpoison_memory() can encounter thp only when the thp is being
- * worked by memory_failure() and the page lock is not held yet.
- * In such case, we yield to memory_failure() and make unpoison fail.
- */
- if (!PageHuge(page) && PageTransHuge(page)) {
- unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
- pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
- if (!get_hwpoison_page(p, flags)) {
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
- unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
- pfn, &unpoison_rs);
- return 0;
- }
+ if (PageSlab(page) || PageTable(page))
+ goto unlock_mutex;
- lock_page(page);
- /*
- * This test is racy because PG_hwpoison is set outside of page lock.
- * That's acceptable because that won't trigger kernel panic. Instead,
- * the PG_hwpoison page will be caught and isolated on the entrance to
- * the free buddy page pool.
- */
- if (TestClearPageHWPoison(page)) {
- unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
- pfn, &unpoison_rs);
- num_poisoned_pages_dec();
- freeit = 1;
- }
- unlock_page(page);
+ ret = get_hwpoison_page(p, MF_UNPOISON);
+ if (!ret) {
+ if (clear_page_hwpoison(&unpoison_rs, page))
+ ret = 0;
+ else
+ ret = -EBUSY;
+ } else if (ret < 0) {
+ if (ret == -EHWPOISON) {
+ ret = unpoison_taken_off_page(&unpoison_rs, p);
+ } else
+ unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
+ pfn, &unpoison_rs);
+ } else {
+ int freeit = clear_page_hwpoison(&unpoison_rs, p);
- put_page(page);
- if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
put_page(page);
+ if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
+ put_page(page);
+ ret = 0;
+ }
+ }
- return 0;
+unlock_mutex:
+ mutex_unlock(&mf_mutex);
+ return ret;
}
EXPORT_SYMBOL(unpoison_memory);
@@ -2225,9 +2271,12 @@ int soft_offline_page(unsigned long pfn, int flags)
return -EIO;
}
+ mutex_lock(&mf_mutex);
+
if (PageHWPoison(page)) {
pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
put_ref_page(ref_page);
+ mutex_unlock(&mf_mutex);
return 0;
}
@@ -2246,5 +2295,7 @@ retry:
}
}
+ mutex_unlock(&mf_mutex);
+
return ret;
}
diff --git a/mm/memory.c b/mm/memory.c
index 23f2f1300d42..c125c4969913 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -41,6 +41,7 @@
#include <linux/kernel_stat.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/numa_balancing.h>
@@ -719,8 +720,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
else if (is_writable_device_exclusive_entry(entry))
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
- set_pte_at(vma->vm_mm, address, ptep, pte);
-
/*
* No need to take a page reference as one was already
* created when the swap entry was made.
@@ -734,6 +733,8 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
*/
WARN_ON_ONCE(!PageAnon(page));
+ set_pte_at(vma->vm_mm, address, ptep, pte);
+
if (vma->vm_flags & VM_LOCKED)
mlock_vma_page(page);
@@ -3528,7 +3529,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
- delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry, vma, vmf->address);
swapcache = page;
@@ -3576,7 +3576,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto unlock;
}
@@ -3590,13 +3589,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* owner processes (which may be unknown at hwpoison time)
*/
ret = VM_FAULT_HWPOISON;
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto out_release;
}
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
- delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
if (!locked) {
ret |= VM_FAULT_RETRY;
goto out_release;
@@ -3647,7 +3644,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
- if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+ if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
@@ -3660,8 +3657,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
pte = pte_mkuffd_wp(pte);
pte = pte_wrprotect(pte);
}
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
- arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
vmf->orig_pte = pte;
/* ksm created a completely new copy */
@@ -3672,6 +3667,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
}
+ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+ arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
+
swap_free(entry);
if (mem_cgroup_swap_full(page) ||
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f6248affaf38..028e8dd82b44 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -134,6 +134,8 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
* @node: Node id to start the search
*
* Lookup the next closest node by distance if @nid is not online.
+ *
+ * Return: this @node if it is online, otherwise the closest node by distance
*/
int numa_map_to_online_node(int node)
{
@@ -296,6 +298,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
atomic_set(&policy->refcnt, 1);
policy->mode = mode;
policy->flags = flags;
+ policy->home_node = NUMA_NO_NODE;
return policy;
}
@@ -810,7 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
- new_pol, vma->vm_userfaultfd_ctx);
+ new_pol, vma->vm_userfaultfd_ctx,
+ vma_anon_name(vma));
if (prev) {
vma = prev;
next = vma->vm_next;
@@ -1477,6 +1481,77 @@ static long kernel_mbind(unsigned long start, unsigned long len,
return do_mbind(start, len, lmode, mode_flags, &nodes, flags);
}
+SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len,
+ unsigned long, home_node, unsigned long, flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct mempolicy *new;
+ unsigned long vmstart;
+ unsigned long vmend;
+ unsigned long end;
+ int err = -ENOENT;
+
+ start = untagged_addr(start);
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ /*
+ * flags is used for future extension if any.
+ */
+ if (flags != 0)
+ return -EINVAL;
+
+ /*
+ * Check home_node is online to avoid accessing uninitialized
+ * NODE_DATA.
+ */
+ if (home_node >= MAX_NUMNODES || !node_online(home_node))
+ return -EINVAL;
+
+ len = (len + PAGE_SIZE - 1) & PAGE_MASK;
+ end = start + len;
+
+ if (end < start)
+ return -EINVAL;
+ if (end == start)
+ return 0;
+ mmap_write_lock(mm);
+ vma = find_vma(mm, start);
+ for (; vma && vma->vm_start < end; vma = vma->vm_next) {
+
+ vmstart = max(start, vma->vm_start);
+ vmend = min(end, vma->vm_end);
+ new = mpol_dup(vma_policy(vma));
+ if (IS_ERR(new)) {
+ err = PTR_ERR(new);
+ break;
+ }
+ /*
+ * Only update home node if there is an existing vma policy
+ */
+ if (!new)
+ continue;
+
+ /*
+ * If any vma in the range got policy other than MPOL_BIND
+ * or MPOL_PREFERRED_MANY we return error. We don't reset
+ * the home node for vmas we already updated before.
+ */
+ if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) {
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ new->home_node = home_node;
+ err = mbind_range(mm, vmstart, vmend, new);
+ mpol_put(new);
+ if (err)
+ break;
+ }
+ mmap_write_unlock(mm);
+ return err;
+}
+
SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
unsigned long, mode, const unsigned long __user *, nmask,
unsigned long, maxnode, unsigned int, flags)
@@ -1801,6 +1876,11 @@ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd)
WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE));
}
+ if ((policy->mode == MPOL_BIND ||
+ policy->mode == MPOL_PREFERRED_MANY) &&
+ policy->home_node != NUMA_NO_NODE)
+ return policy->home_node;
+
return nd;
}
@@ -2061,7 +2141,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes);
if (!page)
- page = __alloc_pages(gfp, order, numa_node_id(), NULL);
+ page = __alloc_pages(gfp, order, nid, NULL);
return page;
}
@@ -2072,7 +2152,6 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
* @order: Order of the GFP allocation.
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual address of the allocation. Must be inside @vma.
- * @node: Which node to prefer for allocation (modulo policy).
* @hugepage: For hugepages try only the preferred node if possible.
*
* Allocate a page for a specific address in @vma, using the appropriate
@@ -2083,9 +2162,10 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
* Return: The page on success or NULL if allocation fails.
*/
struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, int node, bool hugepage)
+ unsigned long addr, bool hugepage)
{
struct mempolicy *pol;
+ int node = numa_node_id();
struct page *page;
int preferred_nid;
nodemask_t *nmask;
@@ -2102,6 +2182,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
}
if (pol->mode == MPOL_PREFERRED_MANY) {
+ node = policy_node(gfp, pol, node);
page = alloc_pages_preferred_many(gfp, order, node, pol);
mpol_cond_put(pol);
goto out;
@@ -2185,7 +2266,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
else if (pol->mode == MPOL_PREFERRED_MANY)
page = alloc_pages_preferred_many(gfp, order,
- numa_node_id(), pol);
+ policy_node(gfp, pol, numa_node_id()), pol);
else
page = __alloc_pages(gfp, order,
policy_node(gfp, pol, numa_node_id()),
@@ -2341,6 +2422,8 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
return false;
if (a->flags != b->flags)
return false;
+ if (a->home_node != b->home_node)
+ return false;
if (mpol_store_user_nodemask(a))
if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask))
return false;
@@ -2884,7 +2967,7 @@ static const char * const policy_modes[] =
* Format of input:
* <mode>[=<flags>][:<nodelist>]
*
- * On success, returns 0, else 1
+ * Return: %0 on success, else %1
*/
int mpol_parse_str(char *str, struct mempolicy **mpol)
{
diff --git a/mm/memremap.c b/mm/memremap.c
index 643965da13a6..6aa5f0c2d11f 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -102,15 +102,22 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
return (range->start + range_len(range)) >> PAGE_SHIFT;
}
-static unsigned long pfn_next(unsigned long pfn)
+static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn)
{
- if (pfn % 1024 == 0)
+ if (pfn % (1024 << pgmap->vmemmap_shift))
cond_resched();
- return pfn + 1;
+ return pfn + pgmap_vmemmap_nr(pgmap);
+}
+
+static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
+{
+ return (pfn_end(pgmap, range_id) -
+ pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
}
#define for_each_device_pfn(pfn, map, i) \
- for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
+ for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \
+ pfn = pfn_next(map, pfn))
static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
{
@@ -271,8 +278,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), pgmap);
- percpu_ref_get_many(&pgmap->ref,
- pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
+ percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
return 0;
err_add_memory:
diff --git a/mm/migrate.c b/mm/migrate.c
index 7079e6b7dbe7..c7da064b4781 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -50,6 +50,7 @@
#include <linux/ptrace.h>
#include <linux/oom.h>
#include <linux/memory.h>
+#include <linux/random.h>
#include <asm/tlbflush.h>
@@ -236,20 +237,19 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
pte = pte_mkhuge(pte);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
- set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
if (PageAnon(new))
hugepage_add_anon_rmap(new, vma, pvmw.address);
else
page_dup_rmap(new, true);
+ set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
} else
#endif
{
- set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
-
if (PageAnon(new))
page_add_anon_rmap(new, vma, pvmw.address, false);
else
page_add_file_rmap(new, false);
+ set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
}
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);
@@ -291,7 +291,6 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
{
pte_t pte;
swp_entry_t entry;
- struct folio *folio;
spin_lock(ptl);
pte = *ptep;
@@ -302,17 +301,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
if (!is_migration_entry(entry))
goto out;
- folio = page_folio(pfn_swap_entry_to_page(entry));
-
- /*
- * Once page cache replacement of page migration started, page_count
- * is zero; but we must not call folio_put_wait_locked() without
- * a ref. Use folio_try_get(), and just fault again if it fails.
- */
- if (!folio_try_get(folio))
- goto out;
- pte_unmap_unlock(ptep, ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(entry, ptep, ptl);
return;
out:
pte_unmap_unlock(ptep, ptl);
@@ -337,16 +326,11 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
{
spinlock_t *ptl;
- struct folio *folio;
ptl = pmd_lock(mm, pmd);
if (!is_pmd_migration_entry(*pmd))
goto unlock;
- folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)));
- if (!folio_try_get(folio))
- goto unlock;
- spin_unlock(ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl);
return;
unlock:
spin_unlock(ptl);
@@ -1084,80 +1068,6 @@ out:
return rc;
}
-
-/*
- * node_demotion[] example:
- *
- * Consider a system with two sockets. Each socket has
- * three classes of memory attached: fast, medium and slow.
- * Each memory class is placed in its own NUMA node. The
- * CPUs are placed in the node with the "fast" memory. The
- * 6 NUMA nodes (0-5) might be split among the sockets like
- * this:
- *
- * Socket A: 0, 1, 2
- * Socket B: 3, 4, 5
- *
- * When Node 0 fills up, its memory should be migrated to
- * Node 1. When Node 1 fills up, it should be migrated to
- * Node 2. The migration path start on the nodes with the
- * processors (since allocations default to this node) and
- * fast memory, progress through medium and end with the
- * slow memory:
- *
- * 0 -> 1 -> 2 -> stop
- * 3 -> 4 -> 5 -> stop
- *
- * This is represented in the node_demotion[] like this:
- *
- * { 1, // Node 0 migrates to 1
- * 2, // Node 1 migrates to 2
- * -1, // Node 2 does not migrate
- * 4, // Node 3 migrates to 4
- * 5, // Node 4 migrates to 5
- * -1} // Node 5 does not migrate
- */
-
-/*
- * Writes to this array occur without locking. Cycles are
- * not allowed: Node X demotes to Y which demotes to X...
- *
- * If multiple reads are performed, a single rcu_read_lock()
- * must be held over all reads to ensure that no cycles are
- * observed.
- */
-static int node_demotion[MAX_NUMNODES] __read_mostly =
- {[0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE};
-
-/**
- * next_demotion_node() - Get the next node in the demotion path
- * @node: The starting node to lookup the next node
- *
- * Return: node id for next memory node in the demotion path hierarchy
- * from @node; NUMA_NO_NODE if @node is terminal. This does not keep
- * @node online or guarantee that it *continues* to be the next demotion
- * target.
- */
-int next_demotion_node(int node)
-{
- int target;
-
- /*
- * node_demotion[] is updated without excluding this
- * function from running. RCU doesn't provide any
- * compiler barriers, so the READ_ONCE() is required
- * to avoid compiler reordering or read merging.
- *
- * Make sure to use RCU over entire code blocks if
- * node_demotion[] reads need to be consistent.
- */
- rcu_read_lock();
- target = READ_ONCE(node_demotion[node]);
- rcu_read_unlock();
-
- return target;
-}
-
/*
* Obtain the lock on page, remove all ptes and migrate the page
* to the newly allocated page in newpage.
@@ -1413,7 +1323,7 @@ static inline int try_split_thp(struct page *page, struct page **page2,
* @mode: The migration mode that specifies the constraints for
* page migration, if any.
* @reason: The reason for page migration.
- * @ret_succeeded: Set to the number of pages migrated successfully if
+ * @ret_succeeded: Set to the number of normal pages migrated successfully if
* the caller passes a non-NULL pointer.
*
* The function returns after 10 attempts or if no pages are movable any more
@@ -1421,7 +1331,9 @@ static inline int try_split_thp(struct page *page, struct page **page2,
* It is caller's responsibility to call putback_movable_pages() to return pages
* to the LRU or free list only if ret != 0.
*
- * Returns the number of pages that were not migrated, or an error code.
+ * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
+ * an error code. The number of THP splits will be considered as the number of
+ * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private,
@@ -1430,6 +1342,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
int retry = 1;
int thp_retry = 1;
int nr_failed = 0;
+ int nr_failed_pages = 0;
int nr_succeeded = 0;
int nr_thp_succeeded = 0;
int nr_thp_failed = 0;
@@ -1441,13 +1354,16 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
int swapwrite = current->flags & PF_SWAPWRITE;
int rc, nr_subpages;
LIST_HEAD(ret_pages);
+ LIST_HEAD(thp_split_pages);
bool nosplit = (reason == MR_NUMA_MISPLACED);
+ bool no_subpage_counting = false;
trace_mm_migrate_pages_start(mode, reason);
if (!swapwrite)
current->flags |= PF_SWAPWRITE;
+thp_subpage_migration:
for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
retry = 0;
thp_retry = 0;
@@ -1460,7 +1376,7 @@ retry:
* during migration.
*/
is_thp = PageTransHuge(page) && !PageHuge(page);
- nr_subpages = thp_nr_pages(page);
+ nr_subpages = compound_nr(page);
cond_resched();
if (PageHuge(page))
@@ -1496,18 +1412,20 @@ retry:
case -ENOSYS:
/* THP migration is unsupported */
if (is_thp) {
- if (!try_split_thp(page, &page2, from)) {
+ nr_thp_failed++;
+ if (!try_split_thp(page, &page2, &thp_split_pages)) {
nr_thp_split++;
goto retry;
}
- nr_thp_failed++;
- nr_failed += nr_subpages;
+ nr_failed_pages += nr_subpages;
break;
}
/* Hugetlb migration is unsupported */
- nr_failed++;
+ if (!no_subpage_counting)
+ nr_failed++;
+ nr_failed_pages += nr_subpages;
break;
case -ENOMEM:
/*
@@ -1516,16 +1434,19 @@ retry:
* THP NUMA faulting doesn't split THP to retry.
*/
if (is_thp && !nosplit) {
- if (!try_split_thp(page, &page2, from)) {
+ nr_thp_failed++;
+ if (!try_split_thp(page, &page2, &thp_split_pages)) {
nr_thp_split++;
goto retry;
}
- nr_thp_failed++;
- nr_failed += nr_subpages;
+ nr_failed_pages += nr_subpages;
goto out;
}
- nr_failed++;
+
+ if (!no_subpage_counting)
+ nr_failed++;
+ nr_failed_pages += nr_subpages;
goto out;
case -EAGAIN:
if (is_thp) {
@@ -1535,12 +1456,11 @@ retry:
retry++;
break;
case MIGRATEPAGE_SUCCESS:
+ nr_succeeded += nr_subpages;
if (is_thp) {
nr_thp_succeeded++;
- nr_succeeded += nr_subpages;
break;
}
- nr_succeeded++;
break;
default:
/*
@@ -1551,17 +1471,37 @@ retry:
*/
if (is_thp) {
nr_thp_failed++;
- nr_failed += nr_subpages;
+ nr_failed_pages += nr_subpages;
break;
}
- nr_failed++;
+
+ if (!no_subpage_counting)
+ nr_failed++;
+ nr_failed_pages += nr_subpages;
break;
}
}
}
- nr_failed += retry + thp_retry;
+ nr_failed += retry;
nr_thp_failed += thp_retry;
- rc = nr_failed;
+ /*
+ * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
+ * counting in this round, since all subpages of a THP is counted
+ * as 1 failure in the first round.
+ */
+ if (!list_empty(&thp_split_pages)) {
+ /*
+ * Move non-migrated pages (after 10 retries) to ret_pages
+ * to avoid migrating them again.
+ */
+ list_splice_init(from, &ret_pages);
+ list_splice_init(&thp_split_pages, from);
+ no_subpage_counting = true;
+ retry = 1;
+ goto thp_subpage_migration;
+ }
+
+ rc = nr_failed + nr_thp_failed;
out:
/*
* Put the permanent failure page back to migration list, they
@@ -1570,11 +1510,11 @@ out:
list_splice(&ret_pages, from);
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
- count_vm_events(PGMIGRATE_FAIL, nr_failed);
+ count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
- trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
+ trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
nr_thp_failed, nr_thp_split, mode, reason);
if (!swapwrite)
@@ -2475,22 +2415,8 @@ static bool migrate_vma_check_page(struct page *page)
return false;
/* Page from ZONE_DEVICE have one extra reference */
- if (is_zone_device_page(page)) {
- /*
- * Private page can never be pin as they have no valid pte and
- * GUP will fail for those. Yet if there is a pending migration
- * a thread might try to wait on the pte migration entry and
- * will bump the page reference count. Sadly there is no way to
- * differentiate a regular pin from migration wait. Hence to
- * avoid 2 racing thread trying to migrate back to CPU to enter
- * infinite loop (one stopping migration because the other is
- * waiting on pte migration entry). We always return true here.
- *
- * FIXME proper solution is to rework migration_entry_wait() so
- * it does not need to take a reference on page.
- */
- return is_device_private_page(page);
- }
+ if (is_zone_device_page(page))
+ extra++;
/* For file back page */
if (page_mapping(page))
@@ -2516,8 +2442,7 @@ static bool migrate_vma_check_page(struct page *page)
static void migrate_vma_unmap(struct migrate_vma *migrate)
{
const unsigned long npages = migrate->npages;
- const unsigned long start = migrate->start;
- unsigned long addr, i, restore = 0;
+ unsigned long i, restore = 0;
bool allow_drain = true;
lru_add_drain();
@@ -2563,7 +2488,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
}
}
- for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
+ for (i = 0; i < npages && restore; i++) {
struct page *page = migrate_pfn_to_page(migrate->src[i]);
if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
@@ -2961,14 +2886,152 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
EXPORT_SYMBOL(migrate_vma_finalize);
#endif /* CONFIG_DEVICE_PRIVATE */
+/*
+ * node_demotion[] example:
+ *
+ * Consider a system with two sockets. Each socket has
+ * three classes of memory attached: fast, medium and slow.
+ * Each memory class is placed in its own NUMA node. The
+ * CPUs are placed in the node with the "fast" memory. The
+ * 6 NUMA nodes (0-5) might be split among the sockets like
+ * this:
+ *
+ * Socket A: 0, 1, 2
+ * Socket B: 3, 4, 5
+ *
+ * When Node 0 fills up, its memory should be migrated to
+ * Node 1. When Node 1 fills up, it should be migrated to
+ * Node 2. The migration path start on the nodes with the
+ * processors (since allocations default to this node) and
+ * fast memory, progress through medium and end with the
+ * slow memory:
+ *
+ * 0 -> 1 -> 2 -> stop
+ * 3 -> 4 -> 5 -> stop
+ *
+ * This is represented in the node_demotion[] like this:
+ *
+ * { nr=1, nodes[0]=1 }, // Node 0 migrates to 1
+ * { nr=1, nodes[0]=2 }, // Node 1 migrates to 2
+ * { nr=0, nodes[0]=-1 }, // Node 2 does not migrate
+ * { nr=1, nodes[0]=4 }, // Node 3 migrates to 4
+ * { nr=1, nodes[0]=5 }, // Node 4 migrates to 5
+ * { nr=0, nodes[0]=-1 }, // Node 5 does not migrate
+ *
+ * Moreover some systems may have multiple slow memory nodes.
+ * Suppose a system has one socket with 3 memory nodes, node 0
+ * is fast memory type, and node 1/2 both are slow memory
+ * type, and the distance between fast memory node and slow
+ * memory node is same. So the migration path should be:
+ *
+ * 0 -> 1/2 -> stop
+ *
+ * This is represented in the node_demotion[] like this:
+ * { nr=2, {nodes[0]=1, nodes[1]=2} }, // Node 0 migrates to node 1 and node 2
+ * { nr=0, nodes[0]=-1, }, // Node 1 dose not migrate
+ * { nr=0, nodes[0]=-1, }, // Node 2 does not migrate
+ */
+
+/*
+ * Writes to this array occur without locking. Cycles are
+ * not allowed: Node X demotes to Y which demotes to X...
+ *
+ * If multiple reads are performed, a single rcu_read_lock()
+ * must be held over all reads to ensure that no cycles are
+ * observed.
+ */
+#define DEFAULT_DEMOTION_TARGET_NODES 15
+
+#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
+#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
+#else
+#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
+#endif
+
+struct demotion_nodes {
+ unsigned short nr;
+ short nodes[DEMOTION_TARGET_NODES];
+};
+
+static struct demotion_nodes *node_demotion __read_mostly;
+
+/**
+ * next_demotion_node() - Get the next node in the demotion path
+ * @node: The starting node to lookup the next node
+ *
+ * Return: node id for next memory node in the demotion path hierarchy
+ * from @node; NUMA_NO_NODE if @node is terminal. This does not keep
+ * @node online or guarantee that it *continues* to be the next demotion
+ * target.
+ */
+int next_demotion_node(int node)
+{
+ struct demotion_nodes *nd;
+ unsigned short target_nr, index;
+ int target;
+
+ if (!node_demotion)
+ return NUMA_NO_NODE;
+
+ nd = &node_demotion[node];
+
+ /*
+ * node_demotion[] is updated without excluding this
+ * function from running. RCU doesn't provide any
+ * compiler barriers, so the READ_ONCE() is required
+ * to avoid compiler reordering or read merging.
+ *
+ * Make sure to use RCU over entire code blocks if
+ * node_demotion[] reads need to be consistent.
+ */
+ rcu_read_lock();
+ target_nr = READ_ONCE(nd->nr);
+
+ switch (target_nr) {
+ case 0:
+ target = NUMA_NO_NODE;
+ goto out;
+ case 1:
+ index = 0;
+ break;
+ default:
+ /*
+ * If there are multiple target nodes, just select one
+ * target node randomly.
+ *
+ * In addition, we can also use round-robin to select
+ * target node, but we should introduce another variable
+ * for node_demotion[] to record last selected target node,
+ * that may cause cache ping-pong due to the changing of
+ * last target node. Or introducing per-cpu data to avoid
+ * caching issue, which seems more complicated. So selecting
+ * target node randomly seems better until now.
+ */
+ index = get_random_int() % target_nr;
+ break;
+ }
+
+ target = READ_ONCE(nd->nodes[index]);
+
+out:
+ rcu_read_unlock();
+ return target;
+}
+
#if defined(CONFIG_HOTPLUG_CPU)
/* Disable reclaim-based migration. */
static void __disable_all_migrate_targets(void)
{
- int node;
+ int node, i;
- for_each_online_node(node)
- node_demotion[node] = NUMA_NO_NODE;
+ if (!node_demotion)
+ return;
+
+ for_each_online_node(node) {
+ node_demotion[node].nr = 0;
+ for (i = 0; i < DEMOTION_TARGET_NODES; i++)
+ node_demotion[node].nodes[i] = NUMA_NO_NODE;
+ }
}
static void disable_all_migrate_targets(void)
@@ -2995,26 +3058,40 @@ static void disable_all_migrate_targets(void)
* Failing here is OK. It might just indicate
* being at the end of a chain.
*/
-static int establish_migrate_target(int node, nodemask_t *used)
+static int establish_migrate_target(int node, nodemask_t *used,
+ int best_distance)
{
- int migration_target;
+ int migration_target, index, val;
+ struct demotion_nodes *nd;
- /*
- * Can not set a migration target on a
- * node with it already set.
- *
- * No need for READ_ONCE() here since this
- * in the write path for node_demotion[].
- * This should be the only thread writing.
- */
- if (node_demotion[node] != NUMA_NO_NODE)
+ if (!node_demotion)
return NUMA_NO_NODE;
+ nd = &node_demotion[node];
+
migration_target = find_next_best_node(node, used);
if (migration_target == NUMA_NO_NODE)
return NUMA_NO_NODE;
- node_demotion[node] = migration_target;
+ /*
+ * If the node has been set a migration target node before,
+ * which means it's the best distance between them. Still
+ * check if this node can be demoted to other target nodes
+ * if they have a same best distance.
+ */
+ if (best_distance != -1) {
+ val = node_distance(node, migration_target);
+ if (val > best_distance)
+ return NUMA_NO_NODE;
+ }
+
+ index = nd->nr;
+ if (WARN_ONCE(index >= DEMOTION_TARGET_NODES,
+ "Exceeds maximum demotion target nodes\n"))
+ return NUMA_NO_NODE;
+
+ nd->nodes[index] = migration_target;
+ nd->nr++;
return migration_target;
}
@@ -3030,7 +3107,9 @@ static int establish_migrate_target(int node, nodemask_t *used)
*
* The difference here is that cycles must be avoided. If
* node0 migrates to node1, then neither node1, nor anything
- * node1 migrates to can migrate to node0.
+ * node1 migrates to can migrate to node0. Also one node can
+ * be migrated to multiple nodes if the target nodes all have
+ * a same best-distance against the source node.
*
* This function can run simultaneously with readers of
* node_demotion[]. However, it can not run simultaneously
@@ -3042,7 +3121,7 @@ static void __set_migration_target_nodes(void)
nodemask_t next_pass = NODE_MASK_NONE;
nodemask_t this_pass = NODE_MASK_NONE;
nodemask_t used_targets = NODE_MASK_NONE;
- int node;
+ int node, best_distance;
/*
* Avoid any oddities like cycles that could occur
@@ -3071,18 +3150,33 @@ again:
* multiple source nodes to share a destination.
*/
nodes_or(used_targets, used_targets, this_pass);
- for_each_node_mask(node, this_pass) {
- int target_node = establish_migrate_target(node, &used_targets);
- if (target_node == NUMA_NO_NODE)
- continue;
+ for_each_node_mask(node, this_pass) {
+ best_distance = -1;
/*
- * Visit targets from this pass in the next pass.
- * Eventually, every node will have been part of
- * a pass, and will become set in 'used_targets'.
+ * Try to set up the migration path for the node, and the target
+ * migration nodes can be multiple, so doing a loop to find all
+ * the target nodes if they all have a best node distance.
*/
- node_set(target_node, next_pass);
+ do {
+ int target_node =
+ establish_migrate_target(node, &used_targets,
+ best_distance);
+
+ if (target_node == NUMA_NO_NODE)
+ break;
+
+ if (best_distance == -1)
+ best_distance = node_distance(node, target_node);
+
+ /*
+ * Visit targets from this pass in the next pass.
+ * Eventually, every node will have been part of
+ * a pass, and will become set in 'used_targets'.
+ */
+ node_set(target_node, next_pass);
+ } while (1);
}
/*
* 'next_pass' contains nodes which became migration
@@ -3183,6 +3277,11 @@ static int __init migrate_on_reclaim_init(void)
{
int ret;
+ node_demotion = kmalloc_array(nr_node_ids,
+ sizeof(struct demotion_nodes),
+ GFP_KERNEL);
+ WARN_ON(!node_demotion);
+
ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
NULL, migration_offline_cpu);
/*
diff --git a/mm/mlock.c b/mm/mlock.c
index e263d62ae2d0..8f584eddd305 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx);
+ vma->vm_userfaultfd_ctx, vma_anon_name(vma));
if (*prev) {
vma = *prev;
goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index bfb0ea164a90..1e8fdb0b51ed 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/vmacache.h>
#include <linux/shm.h>
#include <linux/mman.h>
@@ -1029,7 +1030,8 @@ again:
*/
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ const char *anon_name)
{
/*
* VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -1047,6 +1049,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0;
if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
return 0;
+ if (!is_same_vma_anon_name(vma, anon_name))
+ return 0;
return 1;
}
@@ -1079,9 +1083,10 @@ static int
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ const char *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+ if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
if (vma->vm_pgoff == vm_pgoff)
return 1;
@@ -1100,9 +1105,10 @@ static int
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ const char *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+ if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
pgoff_t vm_pglen;
vm_pglen = vma_pages(vma);
@@ -1113,9 +1119,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
}
/*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor. Or both (it neatly fills a hole).
*
* In most cases - when called for mmap, brk or mremap - [addr,end) is
* certain not to be mapped by the time vma_merge is called; but when
@@ -1160,7 +1166,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
unsigned long end, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t pgoff, struct mempolicy *policy,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ const char *anon_name)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
@@ -1190,7 +1197,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
mpol_equal(vma_policy(prev), policy) &&
can_vma_merge_after(prev, vm_flags,
anon_vma, file, pgoff,
- vm_userfaultfd_ctx)) {
+ vm_userfaultfd_ctx, anon_name)) {
/*
* OK, it can. Can we now merge in the successor as well?
*/
@@ -1199,7 +1206,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
can_vma_merge_before(next, vm_flags,
anon_vma, file,
pgoff+pglen,
- vm_userfaultfd_ctx) &&
+ vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(prev->anon_vma,
next->anon_vma, NULL)) {
/* cases 1, 6 */
@@ -1222,7 +1229,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
mpol_equal(policy, vma_policy(next)) &&
can_vma_merge_before(next, vm_flags,
anon_vma, file, pgoff+pglen,
- vm_userfaultfd_ctx)) {
+ vm_userfaultfd_ctx, anon_name)) {
if (prev && addr < prev->vm_end) /* case 4 */
err = __vma_adjust(prev, prev->vm_start,
addr, prev->vm_pgoff, NULL, next);
@@ -1754,7 +1761,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* Can we just expand an old mapping?
*/
vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
- NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
+ NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
if (vma)
goto out;
@@ -1803,7 +1810,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
*/
if (unlikely(vm_flags != vma->vm_flags && prev)) {
merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
- NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
+ NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
if (merge) {
/* ->mmap() can change vma->vm_file and fput the original file. So
* fput the vma->vm_file here or we would add an extra fput for file
@@ -2928,7 +2935,6 @@ EXPORT_SYMBOL(vm_munmap);
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
addr = untagged_addr(addr);
- profile_munmap(addr);
return __vm_munmap(addr, len, true);
}
@@ -3056,7 +3062,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
/* Can we just expand an old private anonymous mapping? */
vma = vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
+ NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
if (vma)
goto out;
@@ -3142,25 +3148,27 @@ void exit_mmap(struct mm_struct *mm)
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
- * This needs to be done before calling munlock_vma_pages_all(),
+ * This needs to be done before calling unlock_range(),
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
(void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
- mmap_write_lock(mm);
- mmap_write_unlock(mm);
}
+ mmap_write_lock(mm);
if (mm->locked_vm)
unlock_range(mm->mmap, ULONG_MAX);
arch_exit_mmap(mm);
vma = mm->mmap;
- if (!vma) /* Can happen if dup_mmap() received an OOM */
+ if (!vma) {
+ /* Can happen if dup_mmap() received an OOM */
+ mmap_write_unlock(mm);
return;
+ }
lru_add_drain();
flush_cache_mm(mm);
@@ -3171,16 +3179,14 @@ void exit_mmap(struct mm_struct *mm)
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
- /*
- * Walk the list again, actually closing and freeing it,
- * with preemption enabled, without holding any MM locks.
- */
+ /* Walk the list again, actually closing and freeing it. */
while (vma) {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
vma = remove_vma(vma);
cond_resched();
}
+ mmap_write_unlock(mm);
vm_unacct_memory(nr_accounted);
}
@@ -3249,7 +3255,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return NULL; /* should never get here */
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx);
+ vma->vm_userfaultfd_ctx, vma_anon_name(vma));
if (new_vma) {
/*
* Source vma may have been merged into new_vma
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 1b9837419bf9..afb7185ffdc4 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
+#include <linux/mm_inline.h>
#include <linux/pagemap.h>
#include <linux/rcupdate.h>
#include <linux/smp.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e552f5e0ccbd..0138dfcdb1d8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*pprev = vma_merge(mm, *pprev, start, end, newflags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx);
+ vma->vm_userfaultfd_ctx, vma_anon_name(vma));
if (*pprev) {
vma = *pprev;
VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1ddabefcfb5a..832fb330376e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -793,7 +793,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
* coredump_task_exit(), so the oom killer cannot assume that
* the process will promptly exit and release memory.
*/
- if (sig->flags & SIGNAL_GROUP_COREDUMP)
+ if (sig->core_state)
return false;
if (sig->flags & SIGNAL_GROUP_EXIT)
@@ -994,6 +994,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* If necessary, kill all tasks in the selected memory cgroup.
*/
if (oom_group) {
+ memcg_memory_event(oom_group, MEMCG_OOM_GROUP_KILL);
mem_cgroup_print_oom_group(oom_group);
mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
(void *)message);
@@ -1057,7 +1058,7 @@ bool out_of_memory(struct oom_control *oc)
if (!is_memcg_oom(oc)) {
blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
- if (freed > 0)
+ if (freed > 0 && !is_sysrq_oom(oc))
/* Got some memory back in the last second. */
return true;
}
@@ -1169,15 +1170,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}
- if (mmget_not_zero(p->mm)) {
- mm = p->mm;
- if (task_will_free_mem(p))
- reap = true;
- else {
- /* Error only if the work has not been done already */
- if (!test_bit(MMF_OOM_SKIP, &mm->flags))
- ret = -EINVAL;
- }
+ mm = p->mm;
+ mmgrab(mm);
+
+ if (task_will_free_mem(p))
+ reap = true;
+ else {
+ /* Error only if the work has not been done already */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ ret = -EINVAL;
}
task_unlock(p);
@@ -1188,13 +1189,16 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
ret = -EINTR;
goto drop_mm;
}
- if (!__oom_reap_task_mm(mm))
+ /*
+ * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
+ * possible change in exit_mmap is seen
+ */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
ret = -EAGAIN;
mmap_read_unlock(mm);
drop_mm:
- if (mm)
- mmput(mm);
+ mmdrop(mm);
put_task:
put_task_struct(task);
return ret;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c5952749ad40..3589febc6d31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
@@ -63,6 +64,7 @@
#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
#include <linux/page_owner.h>
+#include <linux/page_table_check.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
@@ -72,6 +74,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
+#include <linux/delayacct.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -726,23 +729,33 @@ void free_compound_page(struct page *page)
free_the_page(page, compound_order(page));
}
+static void prep_compound_head(struct page *page, unsigned int order)
+{
+ set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+ set_compound_order(page, order);
+ atomic_set(compound_mapcount_ptr(page), -1);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+}
+
+static void prep_compound_tail(struct page *head, int tail_idx)
+{
+ struct page *p = head + tail_idx;
+
+ p->mapping = TAIL_MAPPING;
+ set_compound_head(p, head);
+}
+
void prep_compound_page(struct page *page, unsigned int order)
{
int i;
int nr_pages = 1 << order;
__SetPageHead(page);
- for (i = 1; i < nr_pages; i++) {
- struct page *p = page + i;
- p->mapping = TAIL_MAPPING;
- set_compound_head(p, page);
- }
+ for (i = 1; i < nr_pages; i++)
+ prep_compound_tail(page, i);
- set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
- set_compound_order(page, order);
- atomic_set(compound_mapcount_ptr(page), -1);
- if (hpage_pincount_available(page))
- atomic_set(compound_pincount_ptr(page), 0);
+ prep_compound_head(page, order);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -1297,6 +1310,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
reset_page_owner(page, order);
+ page_table_check_free(page, order);
return false;
}
@@ -1336,6 +1350,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
page_cpupid_reset_last(page);
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
reset_page_owner(page, order);
+ page_table_check_free(page, order);
if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),
@@ -2410,6 +2425,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
}
set_page_owner(page, order, gfp_flags);
+ page_table_check_alloc(page, order);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -4204,7 +4220,9 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
va_list args;
static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+ if ((gfp_mask & __GFP_NOWARN) ||
+ !__ratelimit(&nopage_rs) ||
+ ((gfp_mask & __GFP_DMA) && !has_managed_dma()))
return;
va_start(args, fmt);
@@ -4348,6 +4366,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
psi_memstall_enter(&pflags);
+ delayacct_compact_start();
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
@@ -4355,6 +4374,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
+ delayacct_compact_end();
if (*compact_result == COMPACT_SKIPPED)
return NULL;
@@ -6562,6 +6582,75 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
}
#ifdef CONFIG_ZONE_DEVICE
+static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
+ unsigned long zone_idx, int nid,
+ struct dev_pagemap *pgmap)
+{
+
+ __init_single_page(page, pfn, zone_idx, nid);
+
+ /*
+ * Mark page reserved as it will need to wait for onlining
+ * phase for it to be fully associated with a zone.
+ *
+ * We can use the non-atomic __set_bit operation for setting
+ * the flag as we are still initializing the pages.
+ */
+ __SetPageReserved(page);
+
+ /*
+ * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
+ * and zone_device_data. It is a bug if a ZONE_DEVICE page is
+ * ever freed or placed on a driver-private list.
+ */
+ page->pgmap = pgmap;
+ page->zone_device_data = NULL;
+
+ /*
+ * Mark the block movable so that blocks are reserved for
+ * movable at startup. This will force kernel allocations
+ * to reserve their blocks rather than leaking throughout
+ * the address space during boot when many long-lived
+ * kernel allocations are made.
+ *
+ * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
+ * because this is done early in section_activate()
+ */
+ if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ cond_resched();
+ }
+}
+
+static void __ref memmap_init_compound(struct page *head,
+ unsigned long head_pfn,
+ unsigned long zone_idx, int nid,
+ struct dev_pagemap *pgmap,
+ unsigned long nr_pages)
+{
+ unsigned long pfn, end_pfn = head_pfn + nr_pages;
+ unsigned int order = pgmap->vmemmap_shift;
+
+ __SetPageHead(head);
+ for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ __init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
+ prep_compound_tail(head, pfn - head_pfn);
+ set_page_count(page, 0);
+
+ /*
+ * The first tail page stores compound_mapcount_ptr() and
+ * compound_order() and the second tail page stores
+ * compound_pincount_ptr(). Call prep_compound_head() after
+ * the first and second tail pages have been initialized to
+ * not have the data overwritten.
+ */
+ if (pfn == head_pfn + 2)
+ prep_compound_head(head, order);
+ }
+}
+
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages,
@@ -6570,6 +6659,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
+ unsigned int pfns_per_compound = pgmap_vmemmap_nr(pgmap);
unsigned long zone_idx = zone_idx(zone);
unsigned long start = jiffies;
int nid = pgdat->node_id;
@@ -6587,42 +6677,16 @@ void __ref memmap_init_zone_device(struct zone *zone,
nr_pages = end_pfn - start_pfn;
}
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
struct page *page = pfn_to_page(pfn);
- __init_single_page(page, pfn, zone_idx, nid);
-
- /*
- * Mark page reserved as it will need to wait for onlining
- * phase for it to be fully associated with a zone.
- *
- * We can use the non-atomic __set_bit operation for setting
- * the flag as we are still initializing the pages.
- */
- __SetPageReserved(page);
+ __init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
- /*
- * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
- * and zone_device_data. It is a bug if a ZONE_DEVICE page is
- * ever freed or placed on a driver-private list.
- */
- page->pgmap = pgmap;
- page->zone_device_data = NULL;
+ if (pfns_per_compound == 1)
+ continue;
- /*
- * Mark the block movable so that blocks are reserved for
- * movable at startup. This will force kernel allocations
- * to reserve their blocks rather than leaking throughout
- * the address space during boot when many long-lived
- * kernel allocations are made.
- *
- * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
- * because this is done early in section_activate()
- */
- if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- cond_resched();
- }
+ memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
+ pfns_per_compound);
}
pr_info("%s initialised %lu pages in %ums\n", __func__,
@@ -8170,7 +8234,7 @@ void __init mem_init_print_info(void)
*/
#define adj_init_size(start, end, size, pos, adj) \
do { \
- if (start <= pos && pos < end && size > adj) \
+ if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \
size -= adj; \
} while (0)
@@ -9214,8 +9278,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
* for allocation requests which can not be fulfilled with the buddy allocator.
*
* The allocated memory is always aligned to a page boundary. If nr_pages is a
- * power of two then the alignment is guaranteed to be to the given nr_pages
- * (e.g. 1GB request would be aligned to 1GB).
+ * power of two, then allocated range is also guaranteed to be aligned to same
+ * nr_pages (e.g. 1GB request would be aligned to 1GB).
*
* Allocated pages can be freed with free_contig_range() or by manually calling
* __free_page() on each allocated page.
@@ -9448,6 +9512,7 @@ bool take_page_off_buddy(struct page *page)
del_page_from_free_list(page_head, zone, page_order);
break_down_buddy_pages(zone, page_head, page, 0,
page_order, migratetype);
+ SetPageHWPoisonTakenOff(page);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, -1, migratetype);
ret = true;
@@ -9459,4 +9524,44 @@ bool take_page_off_buddy(struct page *page)
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
+
+/*
+ * Cancel takeoff done by take_page_off_buddy().
+ */
+bool put_page_back_buddy(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long flags;
+ int migratetype = get_pfnblock_migratetype(page, pfn);
+ bool ret = false;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ if (put_page_testzero(page)) {
+ ClearPageHWPoisonTakenOff(page);
+ __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
+ if (TestClearPageHWPoison(page)) {
+ num_poisoned_pages_dec();
+ ret = true;
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ return ret;
+}
#endif
+
+#ifdef CONFIG_ZONE_DMA
+bool has_managed_dma(void)
+{
+ struct pglist_data *pgdat;
+
+ for_each_online_pgdat(pgdat) {
+ struct zone *zone = &pgdat->node_zones[ZONE_DMA];
+
+ if (managed_zone(zone))
+ return true;
+ }
+ return false;
+}
+#endif /* CONFIG_ZONE_DMA */
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 7d83641eb86b..eb156ff5d603 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -120,7 +120,6 @@ bool page_counter_try_charge(struct page_counter *counter,
new = atomic_long_add_return(nr_pages, &c->usage);
if (new > c->max) {
atomic_long_sub(nr_pages, &c->usage);
- propagate_protected_usage(c, new);
/*
* This is racy, but we can live with some
* inaccuracy in the failcnt which is only used
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 6242afb24d84..2e66d934d63f 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -8,6 +8,7 @@
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
#include <linux/page_idle.h>
+#include <linux/page_table_check.h>
/*
* struct page extension
@@ -63,18 +64,21 @@ static bool need_page_idle(void)
{
return true;
}
-struct page_ext_operations page_idle_ops = {
+static struct page_ext_operations page_idle_ops __initdata = {
.need = need_page_idle,
};
#endif
-static struct page_ext_operations *page_ext_ops[] = {
+static struct page_ext_operations *page_ext_ops[] __initdata = {
#ifdef CONFIG_PAGE_OWNER
&page_owner_ops,
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
&page_idle_ops,
#endif
+#ifdef CONFIG_PAGE_TABLE_CHECK
+ &page_table_check_ops,
+#endif
};
unsigned long page_ext_size = sizeof(struct page_ext);
diff --git a/mm/page_io.c b/mm/page_io.c
index 9725c7e1eeea..0bf8e40f4e57 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -25,6 +25,7 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
+#include <linux/delayacct.h>
void end_swap_bio_write(struct bio *bio)
{
@@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
* significant part of overall IO time.
*/
psi_memstall_enter(&pflags);
+ delayacct_swapin_start();
if (frontswap_load(page) == 0) {
SetPageUptodate(page);
@@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)
out:
psi_memstall_leave(&pflags);
+ delayacct_swapin_end();
return ret;
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index f67c4c70f17f..6a0ddda6b3c5 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -115,7 +115,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* onlining - just onlined memory won't immediately be considered for
* allocation.
*/
- if (!isolated_page) {
+ if (!isolated_page && PageBuddy(page)) {
nr_pages = move_freepages_block(zone, page, migratetype, NULL);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 4f924957ce7a..99e360df9465 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -46,7 +46,7 @@ static int __init early_page_owner_param(char *buf)
}
early_param("page_owner", early_page_owner_param);
-static bool need_page_owner(void)
+static __init bool need_page_owner(void)
{
return page_owner_enabled;
}
@@ -75,11 +75,13 @@ static noinline void register_early_stack(void)
early_handle = create_dummy_stack();
}
-static void init_page_owner(void)
+static __init void init_page_owner(void)
{
if (!page_owner_enabled)
return;
+ stack_depot_init();
+
register_dummy_stack();
register_failure_stack();
register_early_stack();
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
new file mode 100644
index 000000000000..7504e7caa2a1
--- /dev/null
+++ b/mm/page_table_check.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2021, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#include <linux/mm.h>
+#include <linux/page_table_check.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "page_table_check: " fmt
+
+struct page_table_check {
+ atomic_t anon_map_count;
+ atomic_t file_map_count;
+};
+
+static bool __page_table_check_enabled __initdata =
+ IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
+
+DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
+EXPORT_SYMBOL(page_table_check_disabled);
+
+static int __init early_page_table_check_param(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ if (strcmp(buf, "on") == 0)
+ __page_table_check_enabled = true;
+ else if (strcmp(buf, "off") == 0)
+ __page_table_check_enabled = false;
+
+ return 0;
+}
+
+early_param("page_table_check", early_page_table_check_param);
+
+static bool __init need_page_table_check(void)
+{
+ return __page_table_check_enabled;
+}
+
+static void __init init_page_table_check(void)
+{
+ if (!__page_table_check_enabled)
+ return;
+ static_branch_disable(&page_table_check_disabled);
+}
+
+struct page_ext_operations page_table_check_ops = {
+ .size = sizeof(struct page_table_check),
+ .need = need_page_table_check,
+ .init = init_page_table_check,
+};
+
+static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
+{
+ BUG_ON(!page_ext);
+ return (void *)(page_ext) + page_table_check_ops.offset;
+}
+
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) &&
+ (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) &&
+ (pud_val(pud) & _PAGE_USER);
+}
+
+/*
+ * An enty is removed from the page table, decrement the counters for that page
+ * verify that it is of correct type and counters do not become negative.
+ */
+static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ bool anon;
+ int i;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * A new enty is added to the page table, increment the counters for that page
+ * verify that it is of correct type and is not being mapped with a different
+ * type to a different process.
+ */
+static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt,
+ bool rw)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ bool anon;
+ int i;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * page is on free list, or is being allocated, verify that counters are zeroes
+ * crash if they are not.
+ */
+void __page_table_check_zero(struct page *page, unsigned int order)
+{
+ struct page_ext *page_ext = lookup_page_ext(page);
+ int i;
+
+ BUG_ON(!page_ext);
+ for (i = 0; i < (1 << order); i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_clear(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_clear);
+
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_clear(mm, addr, pmd_pfn(pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_clear);
+
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_clear(mm, addr, pud_pfn(pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_clear);
+
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ pte_t old_pte;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pte = *ptep;
+ if (pte_user_accessible_page(old_pte)) {
+ page_table_check_clear(mm, addr, pte_pfn(old_pte),
+ PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_set(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT,
+ pte_write(pte));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_set);
+
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ pmd_t old_pmd;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pmd = *pmdp;
+ if (pmd_user_accessible_page(old_pmd)) {
+ page_table_check_clear(mm, addr, pmd_pfn(old_pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_set(mm, addr, pmd_pfn(pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT,
+ pmd_write(pmd));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_set);
+
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ pud_t old_pud;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pud = *pudp;
+ if (pud_user_accessible_page(old_pud)) {
+ page_table_check_clear(mm, addr, pud_pfn(old_pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_set(mm, addr, pud_pfn(pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT,
+ pud_write(pud));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_set);
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 639662c20c82..411d1593ef23 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -113,6 +113,24 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
}
+#ifdef CONFIG_MEMCG_KMEM
+/**
+ * pcpu_obj_full_size - helper to calculate size of each accounted object
+ * @size: size of area to allocate in bytes
+ *
+ * For each accounted object there is an extra space which is used to store
+ * obj_cgroup membership. Charge it too.
+ */
+static inline size_t pcpu_obj_full_size(size_t size)
+{
+ size_t extra_size;
+
+ extra_size = size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *);
+
+ return size * num_possible_cpus() + extra_size;
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
#ifdef CONFIG_PERCPU_STATS
#include <linux/spinlock.h>
diff --git a/mm/percpu.c b/mm/percpu.c
index f5b2c2ea5a54..ea28db283044 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -779,7 +779,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
{
struct pcpu_block_md *block = chunk->md_blocks + index;
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
- unsigned int rs, re, start; /* region start, region end */
+ unsigned int start, end; /* region start, region end */
/* promote scan_hint to contig_hint */
if (block->scan_hint) {
@@ -795,9 +795,8 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
block->right_free = 0;
/* iterate over free areas and update the contig hints */
- bitmap_for_each_clear_region(alloc_map, rs, re, start,
- PCPU_BITMAP_BLOCK_BITS)
- pcpu_block_update(block, rs, re);
+ for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS)
+ pcpu_block_update(block, start, end);
}
/**
@@ -1070,17 +1069,18 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
int *next_off)
{
- unsigned int page_start, page_end, rs, re;
+ unsigned int start, end;
- page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
- page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
+ start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
+ end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
- rs = page_start;
- bitmap_next_clear_region(chunk->populated, &rs, &re, page_end);
- if (rs >= page_end)
+ start = find_next_zero_bit(chunk->populated, end, start);
+ if (start >= end)
return true;
- *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
+ end = find_next_bit(chunk->populated, end, start + 1);
+
+ *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
return false;
}
@@ -1635,7 +1635,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
if (!objcg)
return true;
- if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) {
+ if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size))) {
obj_cgroup_put(objcg);
return false;
}
@@ -1656,10 +1656,10 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- size * num_possible_cpus());
+ pcpu_obj_full_size(size));
rcu_read_unlock();
} else {
- obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+ obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
obj_cgroup_put(objcg);
}
}
@@ -1676,11 +1676,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
return;
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
- obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+ obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- -(size * num_possible_cpus()));
+ -pcpu_obj_full_size(size));
rcu_read_unlock();
obj_cgroup_put(objcg);
@@ -1851,13 +1851,12 @@ area_found:
/* populate if not all pages are already there */
if (!is_atomic) {
- unsigned int page_start, page_end, rs, re;
+ unsigned int page_end, rs, re;
- page_start = PFN_DOWN(off);
+ rs = PFN_DOWN(off);
page_end = PFN_UP(off + size);
- bitmap_for_each_clear_region(chunk->populated, rs, re,
- page_start, page_end) {
+ for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) {
WARN_ON(chunk->immutable);
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
@@ -2013,8 +2012,7 @@ static void pcpu_balance_free(bool empty_only)
list_for_each_entry_safe(chunk, next, &to_free, list) {
unsigned int rs, re;
- bitmap_for_each_set_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
+ for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
pcpu_depopulate_chunk(chunk, rs, re);
spin_lock_irq(&pcpu_lock);
pcpu_chunk_depopulated(chunk, rs, re);
@@ -2084,8 +2082,7 @@ retry_pop:
continue;
/* @chunk can't go away while pcpu_alloc_mutex is held */
- bitmap_for_each_clear_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
+ for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
int nr = min_t(int, re - rs, nr_to_pop);
spin_unlock_irq(&pcpu_lock);
@@ -2992,6 +2989,42 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
return ai;
}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NUMA
+ int node = NUMA_NO_NODE;
+ void *ptr;
+
+ if (cpu_to_nd_fn)
+ node = cpu_to_nd_fn(cpu);
+
+ if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
+ cpu, size, (u64)__pa(ptr));
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ node);
+
+ pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
+ cpu, size, node, (u64)__pa(ptr));
+ }
+ return ptr;
+#else
+ return memblock_alloc_from(size, align, goal);
+#endif
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+ memblock_free(ptr, size);
+}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
#if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3001,14 +3034,13 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
* vmalloc area. Allocations are always whole multiples of @atom_size
* aligned to @atom_size.
*
@@ -3022,7 +3054,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
* @dyn_size specifies the minimum dynamic area size.
*
* If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
*
* RETURNS:
* 0 on success, -errno on failure.
@@ -3030,8 +3062,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn)
+ pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
void *base = (void *)ULONG_MAX;
void **areas = NULL;
@@ -3066,7 +3097,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
BUG_ON(cpu == NR_CPUS);
/* allocate space for the whole group */
- ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+ ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
if (!ptr) {
rc = -ENOMEM;
goto out_free_areas;
@@ -3105,12 +3136,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
if (gi->cpu_map[i] == NR_CPUS) {
/* unused unit, free whole */
- free_fn(ptr, ai->unit_size);
+ pcpu_fc_free(ptr, ai->unit_size);
continue;
}
/* copy and return the unused part */
memcpy(ptr, __per_cpu_load, ai->static_size);
- free_fn(ptr + size_sum, ai->unit_size - size_sum);
+ pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
}
}
@@ -3129,7 +3160,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
out_free_areas:
for (group = 0; group < ai->nr_groups; group++)
if (areas[group])
- free_fn(areas[group],
+ pcpu_fc_free(areas[group],
ai->groups[group].nr_units * ai->unit_size);
out_free:
pcpu_free_alloc_info(ai);
@@ -3140,12 +3171,79 @@ out_free:
#endif /* BUILD_EMBED_FIRST_CHUNK */
#ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ p4d_t *new;
+
+ new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pgd_populate(&init_mm, pgd, new);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate memory\n", __func__);
+}
+
/**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
- * @populate_pte_fn: function to populate pte
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
*
* This is a helper to ease setting up page-remapped first percpu
* chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3156,10 +3254,7 @@ out_free:
* RETURNS:
* 0 on success, -errno on failure.
*/
-int __init pcpu_page_first_chunk(size_t reserved_size,
- pcpu_fc_alloc_fn_t alloc_fn,
- pcpu_fc_free_fn_t free_fn,
- pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
static struct vm_struct vm;
struct pcpu_alloc_info *ai;
@@ -3201,7 +3296,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
for (i = 0; i < unit_pages; i++) {
void *ptr;
- ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+ ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
if (!ptr) {
pr_warn("failed to allocate %s page for cpu%u\n",
psize_str, cpu);
@@ -3223,7 +3318,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
(unsigned long)vm.addr + unit * ai->unit_size;
for (i = 0; i < unit_pages; i++)
- populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+ pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
/* pte already populated, the following shouldn't fail */
rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
@@ -3253,7 +3348,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
enomem:
while (--j >= 0)
- free_fn(page_address(pages[j]), PAGE_SIZE);
+ pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
rc = -ENOMEM;
out_free_ar:
memblock_free(pages, pages_size);
@@ -3278,17 +3373,6 @@ out_free_ar:
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
- size_t align)
-{
- return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
@@ -3299,9 +3383,8 @@ void __init setup_per_cpu_areas(void)
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
- pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+ PAGE_SIZE, NULL, NULL);
if (rc < 0)
panic("Failed to initialize percpu areas.");
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 4e640baf9794..6523fda274e5 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -10,6 +10,7 @@
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
+#include <linux/mm_inline.h>
#include <asm/tlb.h>
/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 163ac4e6bcee..6a1e8c7f6213 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -621,9 +621,20 @@ void try_to_unmap_flush_dirty(void)
try_to_unmap_flush();
}
+/*
+ * Bits 0-14 of mm->tlb_flush_batched record pending generations.
+ * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations.
+ */
+#define TLB_FLUSH_BATCH_FLUSHED_SHIFT 16
+#define TLB_FLUSH_BATCH_PENDING_MASK \
+ ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
+#define TLB_FLUSH_BATCH_PENDING_LARGE \
+ (TLB_FLUSH_BATCH_PENDING_MASK / 2)
+
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+ int batch, nbatch;
arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
tlb_ubc->flush_required = true;
@@ -633,7 +644,22 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
* before the PTE is cleared.
*/
barrier();
- mm->tlb_flush_batched = true;
+ batch = atomic_read(&mm->tlb_flush_batched);
+retry:
+ if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
+ /*
+ * Prevent `pending' from catching up with `flushed' because of
+ * overflow. Reset `pending' and `flushed' to be 1 and 0 if
+ * `pending' becomes large.
+ */
+ nbatch = atomic_cmpxchg(&mm->tlb_flush_batched, batch, 1);
+ if (nbatch != batch) {
+ batch = nbatch;
+ goto retry;
+ }
+ } else {
+ atomic_inc(&mm->tlb_flush_batched);
+ }
/*
* If the PTE was dirty then it's best to assume it's writable. The
@@ -680,15 +706,18 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
- if (data_race(mm->tlb_flush_batched)) {
- flush_tlb_mm(mm);
+ int batch = atomic_read(&mm->tlb_flush_batched);
+ int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK;
+ int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;
+ if (pending != flushed) {
+ flush_tlb_mm(mm);
/*
- * Do not allow the compiler to re-order the clearing of
- * tlb_flush_batched before the tlb is flushed.
+ * If the new TLB flushing is pending during flushing, leave
+ * mm->tlb_flush_batched as is, to avoid losing flushing.
*/
- barrier();
- mm->tlb_flush_batched = false;
+ atomic_cmpxchg(&mm->tlb_flush_batched, batch,
+ pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));
}
}
#else
diff --git a/mm/shmem.c b/mm/shmem.c
index 28d627444a24..a09b29ec2b45 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -36,7 +36,6 @@
#include <linux/uio.h>
#include <linux/khugepaged.h>
#include <linux/hugetlb.h>
-#include <linux/frontswap.h>
#include <linux/fs_parser.h>
#include <linux/swapfile.h>
@@ -554,7 +553,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shmem_inode_info *info;
struct page *page;
unsigned long batch = sc ? sc->nr_to_scan : 128;
- int removed = 0, split = 0;
+ int split = 0;
if (list_empty(&sbinfo->shrinklist))
return SHRINK_STOP;
@@ -569,7 +568,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
/* inode is about to be evicted */
if (!inode) {
list_del_init(&info->shrinklist);
- removed++;
goto next;
}
@@ -577,12 +575,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
if (round_up(inode->i_size, PAGE_SIZE) ==
round_up(inode->i_size, HPAGE_PMD_SIZE)) {
list_move(&info->shrinklist, &to_remove);
- removed++;
goto next;
}
list_move(&info->shrinklist, &list);
next:
+ sbinfo->shrinklist_len--;
if (!--batch)
break;
}
@@ -602,7 +600,7 @@ next:
inode = &info->vfs_inode;
if (nr_to_split && split >= nr_to_split)
- goto leave;
+ goto move_back;
page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
@@ -616,38 +614,44 @@ next:
}
/*
- * Leave the inode on the list if we failed to lock
- * the page at this time.
+ * Move the inode on the list back to shrinklist if we failed
+ * to lock the page at this time.
*
* Waiting for the lock may lead to deadlock in the
* reclaim path.
*/
if (!trylock_page(page)) {
put_page(page);
- goto leave;
+ goto move_back;
}
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
- /* If split failed leave the inode on the list */
+ /* If split failed move the inode on the list back to shrinklist */
if (ret)
- goto leave;
+ goto move_back;
split++;
drop:
list_del_init(&info->shrinklist);
- removed++;
-leave:
+ goto put;
+move_back:
+ /*
+ * Make sure the inode is either on the global list or deleted
+ * from any local list before iput() since it could be deleted
+ * in another thread once we put the inode (then the local list
+ * is corrupted).
+ */
+ spin_lock(&sbinfo->shrinklist_lock);
+ list_move(&info->shrinklist, &sbinfo->shrinklist);
+ sbinfo->shrinklist_len++;
+ spin_unlock(&sbinfo->shrinklist_lock);
+put:
iput(inode);
}
- spin_lock(&sbinfo->shrinklist_lock);
- list_splice_tail(&list, &sbinfo->shrinklist);
- sbinfo->shrinklist_len -= removed;
- spin_unlock(&sbinfo->shrinklist_lock);
-
return split;
}
@@ -1147,7 +1151,7 @@ static void shmem_evict_inode(struct inode *inode)
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
- unsigned int type, bool frontswap)
+ unsigned int type)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
@@ -1168,9 +1172,6 @@ static int shmem_find_swap_entries(struct address_space *mapping,
entry = radix_to_swp_entry(page);
if (swp_type(entry) != type)
continue;
- if (frontswap &&
- !frontswap_test(swap_info[type], swp_offset(entry)))
- continue;
indices[ret] = xas.xa_index;
entries[ret] = page;
@@ -1223,26 +1224,20 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
/*
* If swap found in inode, free it and move page from swapcache to filecache.
*/
-static int shmem_unuse_inode(struct inode *inode, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int shmem_unuse_inode(struct inode *inode, unsigned int type)
{
struct address_space *mapping = inode->i_mapping;
pgoff_t start = 0;
struct pagevec pvec;
pgoff_t indices[PAGEVEC_SIZE];
- bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0);
int ret = 0;
pagevec_init(&pvec);
do {
unsigned int nr_entries = PAGEVEC_SIZE;
- if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE)
- nr_entries = *fs_pages_to_unuse;
-
pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
- pvec.pages, indices,
- type, frontswap);
+ pvec.pages, indices, type);
if (pvec.nr == 0) {
ret = 0;
break;
@@ -1252,14 +1247,6 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
if (ret < 0)
break;
- if (frontswap_partial) {
- *fs_pages_to_unuse -= ret;
- if (*fs_pages_to_unuse == 0) {
- ret = FRONTSWAP_PAGES_UNUSED;
- break;
- }
- }
-
start = indices[pvec.nr - 1];
} while (true);
@@ -1271,8 +1258,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
* device 'type' back into memory, so the swap device can be
* unused.
*/
-int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+int shmem_unuse(unsigned int type)
{
struct shmem_inode_info *info, *next;
int error = 0;
@@ -1295,8 +1281,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
- error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
- fs_pages_to_unuse);
+ error = shmem_unuse_inode(&info->vfs_inode, type);
cond_resched();
mutex_lock(&shmem_swaplist_mutex);
@@ -1541,8 +1526,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
return NULL;
shmem_pseudo_vma_init(&pvma, info, hindex);
- page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
- true);
+ page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
shmem_pseudo_vma_destroy(&pvma);
if (page)
prep_transhuge_page(page);
@@ -2439,6 +2423,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t index = pos >> PAGE_SHIFT;
+ int ret = 0;
/* i_rwsem is held by caller */
if (unlikely(info->seals & (F_SEAL_GROW |
@@ -2449,7 +2434,19 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
return -EPERM;
}
- return shmem_getpage(inode, index, pagep, SGP_WRITE);
+ ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
+
+ if (ret)
+ return ret;
+
+ if (PageHWPoison(*pagep)) {
+ unlock_page(*pagep);
+ put_page(*pagep);
+ *pagep = NULL;
+ return -EIO;
+ }
+
+ return 0;
}
static int
@@ -2536,6 +2533,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (sgp == SGP_CACHE)
set_page_dirty(page);
unlock_page(page);
+
+ if (PageHWPoison(page)) {
+ put_page(page);
+ error = -EIO;
+ break;
+ }
}
/*
@@ -3075,7 +3078,8 @@ static const char *shmem_get_link(struct dentry *dentry,
page = find_get_page(inode->i_mapping, 0);
if (!page)
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
+ if (PageHWPoison(page) ||
+ !PageUptodate(page)) {
put_page(page);
return ERR_PTR(-ECHILD);
}
@@ -3083,6 +3087,13 @@ static const char *shmem_get_link(struct dentry *dentry,
error = shmem_getpage(inode, 0, &page, SGP_READ);
if (error)
return ERR_PTR(error);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (PageHWPoison(page)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
unlock_page(page);
}
set_delayed_call(done, shmem_put_link, page);
@@ -3733,6 +3744,13 @@ static void shmem_destroy_inodecache(void)
kmem_cache_destroy(shmem_inode_cachep);
}
+/* Keep the page in page cache instead of truncating it */
+static int shmem_error_remove_page(struct address_space *mapping,
+ struct page *page)
+{
+ return 0;
+}
+
const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.set_page_dirty = __set_page_dirty_no_writeback,
@@ -3743,7 +3761,7 @@ const struct address_space_operations shmem_aops = {
#ifdef CONFIG_MIGRATION
.migratepage = migrate_page,
#endif
- .error_remove_page = generic_error_remove_page,
+ .error_remove_page = shmem_error_remove_page,
};
EXPORT_SYMBOL(shmem_aops);
@@ -3977,8 +3995,7 @@ int __init shmem_init(void)
return 0;
}
-int shmem_unuse(unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+int shmem_unuse(unsigned int type)
{
return 0;
}
@@ -4151,9 +4168,14 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
gfp, NULL, NULL, NULL);
if (error)
- page = ERR_PTR(error);
- else
- unlock_page(page);
+ return ERR_PTR(error);
+
+ unlock_page(page);
+ if (PageHWPoison(page)) {
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+
return page;
#else
/*
diff --git a/mm/slab.h b/mm/slab.h
index 95b9a74a2d51..c7f2abc2b154 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -67,14 +67,8 @@ struct slab {
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
-SLAB_MATCH(slab_list, slab_list);
#ifndef CONFIG_SLOB
SLAB_MATCH(rcu_head, rcu_head);
-SLAB_MATCH(slab_cache, slab_cache);
-#endif
-#ifdef CONFIG_SLAB
-SLAB_MATCH(s_mem, s_mem);
-SLAB_MATCH(active, active);
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
@@ -794,11 +788,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
#endif
-void *slab_start(struct seq_file *m, loff_t *pos);
-void *slab_next(struct seq_file *m, void *p, loff_t *pos);
-void slab_stop(struct seq_file *m, void *p);
-int memcg_slab_show(struct seq_file *m, void *p);
-
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
void dump_unreclaimable_slab(void);
#else
diff --git a/mm/slab_common.c b/mm/slab_common.c
index dc15566141d4..23f2ab0713b7 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -489,9 +489,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
- int err;
-
- if (unlikely(!s))
+ if (unlikely(!s) || !kasan_check_byte(s))
return;
cpus_read_lock();
@@ -501,12 +499,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
if (s->refcount)
goto out_unlock;
- err = shutdown_cache(s);
- if (err) {
- pr_err("%s %s: Slab cache still has objects\n",
- __func__, s->name);
- dump_stack();
- }
+ WARN(shutdown_cache(s),
+ "%s %s: Slab cache still has objects when called from %pS",
+ __func__, s->name, (void *)_RET_IP_);
out_unlock:
mutex_unlock(&slab_mutex);
cpus_read_unlock();
@@ -824,7 +819,7 @@ void __init setup_kmalloc_cache_index_table(void)
if (KMALLOC_MIN_SIZE >= 64) {
/*
- * The 96 byte size cache is not used if the alignment
+ * The 96 byte sized cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
@@ -849,7 +844,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
if (type == KMALLOC_RECLAIM) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
- if (cgroup_memory_nokmem) {
+ if (mem_cgroup_kmem_disabled()) {
kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx];
return;
}
@@ -1044,18 +1039,18 @@ static void print_slabinfo_header(struct seq_file *m)
seq_putc(m, '\n');
}
-void *slab_start(struct seq_file *m, loff_t *pos)
+static void *slab_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&slab_mutex);
return seq_list_start(&slab_caches, *pos);
}
-void *slab_next(struct seq_file *m, void *p, loff_t *pos)
+static void *slab_next(struct seq_file *m, void *p, loff_t *pos)
{
return seq_list_next(p, &slab_caches, pos);
}
-void slab_stop(struct seq_file *m, void *p)
+static void slab_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
}
@@ -1123,17 +1118,6 @@ void dump_unreclaimable_slab(void)
mutex_unlock(&slab_mutex);
}
-#if defined(CONFIG_MEMCG_KMEM)
-int memcg_slab_show(struct seq_file *m, void *p)
-{
- /*
- * Deprecated.
- * Please, take a look at tools/cgroup/slabinfo.py .
- */
- return 0;
-}
-#endif
-
/*
* slabinfo_op - iterator that generates /proc/slabinfo
*
diff --git a/mm/swap.c b/mm/swap.c
index 74f6b311d7ee..bcf3ac288b56 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -882,7 +882,7 @@ void lru_cache_disable(void)
* all online CPUs so any calls of lru_cache_disabled wrapped by
* local_lock or preemption disabled would be ordered by that.
* The atomic operation doesn't need to have stronger ordering
- * requirements because that is enforeced by the scheduling
+ * requirements because that is enforced by the scheduling
* guarantees.
*/
__lru_add_drain_all(true);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e59e08ef46e1..bf0df7aa7158 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -49,7 +49,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
-DEFINE_SPINLOCK(swap_lock);
+static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
atomic_long_t nr_swap_pages;
/*
@@ -71,7 +71,7 @@ static const char Unused_offset[] = "Unused swap offset entry ";
* all active swap_info_structs
* protected with swap_lock, and ordered by priority.
*/
-PLIST_HEAD(swap_active_head);
+static PLIST_HEAD(swap_active_head);
/*
* all available (active, not full) swap_info_structs
@@ -1601,31 +1601,30 @@ static bool page_swapped(struct page *page)
return false;
}
-static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
+static int page_trans_huge_map_swapcount(struct page *page,
int *total_swapcount)
{
- int i, map_swapcount, _total_mapcount, _total_swapcount;
+ int i, map_swapcount, _total_swapcount;
unsigned long offset = 0;
struct swap_info_struct *si;
struct swap_cluster_info *ci = NULL;
unsigned char *map = NULL;
- int mapcount, swapcount = 0;
+ int swapcount = 0;
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
- mapcount = page_trans_huge_mapcount(page, total_mapcount);
if (PageSwapCache(page))
swapcount = page_swapcount(page);
if (total_swapcount)
*total_swapcount = swapcount;
- return mapcount + swapcount;
+ return swapcount + page_trans_huge_mapcount(page);
}
page = compound_head(page);
- _total_mapcount = _total_swapcount = map_swapcount = 0;
+ _total_swapcount = map_swapcount = 0;
if (PageSwapCache(page)) {
swp_entry_t entry;
@@ -1639,8 +1638,7 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
if (map)
ci = lock_cluster(si, offset);
for (i = 0; i < HPAGE_PMD_NR; i++) {
- mapcount = atomic_read(&page[i]._mapcount) + 1;
- _total_mapcount += mapcount;
+ int mapcount = atomic_read(&page[i]._mapcount) + 1;
if (map) {
swapcount = swap_count(map[offset + i]);
_total_swapcount += swapcount;
@@ -1648,19 +1646,14 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
map_swapcount = max(map_swapcount, mapcount + swapcount);
}
unlock_cluster(ci);
- if (PageDoubleMap(page)) {
+
+ if (PageDoubleMap(page))
map_swapcount -= 1;
- _total_mapcount -= HPAGE_PMD_NR;
- }
- mapcount = compound_mapcount(page);
- map_swapcount += mapcount;
- _total_mapcount += mapcount;
- if (total_mapcount)
- *total_mapcount = _total_mapcount;
+
if (total_swapcount)
*total_swapcount = _total_swapcount;
- return map_swapcount;
+ return map_swapcount + compound_mapcount(page);
}
/*
@@ -1668,22 +1661,15 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
* to it. And as a side-effect, free up its swap: because the old content
* on disk will never be read, and seeking back there to write new content
* later would only waste time away from clustering.
- *
- * NOTE: total_map_swapcount should not be relied upon by the caller if
- * reuse_swap_page() returns false, but it may be always overwritten
- * (see the other implementation for CONFIG_SWAP=n).
*/
-bool reuse_swap_page(struct page *page, int *total_map_swapcount)
+bool reuse_swap_page(struct page *page)
{
- int count, total_mapcount, total_swapcount;
+ int count, total_swapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (unlikely(PageKsm(page)))
return false;
- count = page_trans_huge_map_swapcount(page, &total_mapcount,
- &total_swapcount);
- if (total_map_swapcount)
- *total_map_swapcount = total_mapcount + total_swapcount;
+ count = page_trans_huge_map_swapcount(page, &total_swapcount);
if (count == 1 && PageSwapCache(page) &&
(likely(!PageTransCompound(page)) ||
/* The remaining swap count will be freed soon */
@@ -1917,14 +1903,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
get_page(page);
- set_pte_at(vma->vm_mm, addr, pte,
- pte_mkold(mk_pte(page, vma->vm_page_prot)));
if (page == swapcache) {
page_add_anon_rmap(page, vma, addr, false);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr, false);
lru_cache_add_inactive_or_unevictable(page, vma);
}
+ set_pte_at(vma->vm_mm, addr, pte,
+ pte_mkold(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
out:
pte_unmap_unlock(pte, ptl);
@@ -1937,8 +1923,7 @@ out:
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
struct page *page;
swp_entry_t entry;
@@ -1959,9 +1944,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
offset = swp_offset(entry);
- if (frontswap && !frontswap_test(si, offset))
- continue;
-
pte_unmap(pte);
swap_map = &si->swap_map[offset];
page = lookup_swap_cache(entry, vma, addr);
@@ -1993,11 +1975,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
try_to_free_swap(page);
unlock_page(page);
put_page(page);
-
- if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) {
- ret = FRONTSWAP_PAGES_UNUSED;
- goto out;
- }
try_next:
pte = pte_offset_map(pmd, addr);
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -2010,8 +1987,7 @@ out:
static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
pmd_t *pmd;
unsigned long next;
@@ -2023,8 +1999,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
next = pmd_addr_end(addr, end);
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
continue;
- ret = unuse_pte_range(vma, pmd, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pte_range(vma, pmd, addr, next, type);
if (ret)
return ret;
} while (pmd++, addr = next, addr != end);
@@ -2033,8 +2008,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
pud_t *pud;
unsigned long next;
@@ -2045,8 +2019,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- ret = unuse_pmd_range(vma, pud, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pmd_range(vma, pud, addr, next, type);
if (ret)
return ret;
} while (pud++, addr = next, addr != end);
@@ -2055,8 +2028,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
- unsigned int type, bool frontswap,
- unsigned long *fs_pages_to_unuse)
+ unsigned int type)
{
p4d_t *p4d;
unsigned long next;
@@ -2067,16 +2039,14 @@ static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
- ret = unuse_pud_range(vma, p4d, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_pud_range(vma, p4d, addr, next, type);
if (ret)
return ret;
} while (p4d++, addr = next, addr != end);
return 0;
}
-static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int unuse_vma(struct vm_area_struct *vma, unsigned int type)
{
pgd_t *pgd;
unsigned long addr, end, next;
@@ -2090,16 +2060,14 @@ static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- ret = unuse_p4d_range(vma, pgd, addr, next, type,
- frontswap, fs_pages_to_unuse);
+ ret = unuse_p4d_range(vma, pgd, addr, next, type);
if (ret)
return ret;
} while (pgd++, addr = next, addr != end);
return 0;
}
-static int unuse_mm(struct mm_struct *mm, unsigned int type,
- bool frontswap, unsigned long *fs_pages_to_unuse)
+static int unuse_mm(struct mm_struct *mm, unsigned int type)
{
struct vm_area_struct *vma;
int ret = 0;
@@ -2107,8 +2075,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
mmap_read_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->anon_vma) {
- ret = unuse_vma(vma, type, frontswap,
- fs_pages_to_unuse);
+ ret = unuse_vma(vma, type);
if (ret)
break;
}
@@ -2124,7 +2091,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
* if there are no inuse entries after prev till end of the map.
*/
static unsigned int find_next_to_unuse(struct swap_info_struct *si,
- unsigned int prev, bool frontswap)
+ unsigned int prev)
{
unsigned int i;
unsigned char count;
@@ -2138,8 +2105,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
for (i = prev + 1; i < si->max; i++) {
count = READ_ONCE(si->swap_map[i]);
if (count && swap_count(count) != SWAP_MAP_BAD)
- if (!frontswap || frontswap_test(si, i))
- break;
+ break;
if ((i % LATENCY_LIMIT) == 0)
cond_resched();
}
@@ -2150,12 +2116,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
return i;
}
-/*
- * If the boolean frontswap is true, only unuse pages_to_unuse pages;
- * pages_to_unuse==0 means all pages; ignored if frontswap is false
- */
-int try_to_unuse(unsigned int type, bool frontswap,
- unsigned long pages_to_unuse)
+static int try_to_unuse(unsigned int type)
{
struct mm_struct *prev_mm;
struct mm_struct *mm;
@@ -2169,13 +2130,10 @@ int try_to_unuse(unsigned int type, bool frontswap,
if (!READ_ONCE(si->inuse_pages))
return 0;
- if (!frontswap)
- pages_to_unuse = 0;
-
retry:
- retval = shmem_unuse(type, frontswap, &pages_to_unuse);
+ retval = shmem_unuse(type);
if (retval)
- goto out;
+ return retval;
prev_mm = &init_mm;
mmget(prev_mm);
@@ -2192,11 +2150,10 @@ retry:
spin_unlock(&mmlist_lock);
mmput(prev_mm);
prev_mm = mm;
- retval = unuse_mm(mm, type, frontswap, &pages_to_unuse);
-
+ retval = unuse_mm(mm, type);
if (retval) {
mmput(prev_mm);
- goto out;
+ return retval;
}
/*
@@ -2213,7 +2170,7 @@ retry:
i = 0;
while (READ_ONCE(si->inuse_pages) &&
!signal_pending(current) &&
- (i = find_next_to_unuse(si, i, frontswap)) != 0) {
+ (i = find_next_to_unuse(si, i)) != 0) {
entry = swp_entry(type, i);
page = find_get_page(swap_address_space(entry), i);
@@ -2231,14 +2188,6 @@ retry:
try_to_free_swap(page);
unlock_page(page);
put_page(page);
-
- /*
- * For frontswap, we just need to unuse pages_to_unuse, if
- * it was specified. Need not check frontswap again here as
- * we already zeroed out pages_to_unuse if not frontswap.
- */
- if (pages_to_unuse && --pages_to_unuse == 0)
- goto out;
}
/*
@@ -2256,10 +2205,10 @@ retry:
if (READ_ONCE(si->inuse_pages)) {
if (!signal_pending(current))
goto retry;
- retval = -EINTR;
+ return -EINTR;
}
-out:
- return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
+
+ return 0;
}
/*
@@ -2477,7 +2426,8 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
struct swap_cluster_info *cluster_info,
unsigned long *frontswap_map)
{
- frontswap_init(p->type, frontswap_map);
+ if (IS_ENABLED(CONFIG_FRONTSWAP))
+ frontswap_init(p->type, frontswap_map);
spin_lock(&swap_lock);
spin_lock(&p->lock);
setup_swap_info(p, prio, swap_map, cluster_info);
@@ -2590,7 +2540,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
disable_swap_slots_cache_lock();
set_current_oom_origin();
- err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
+ err = try_to_unuse(p->type);
clear_current_oom_origin();
if (err) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 5c87cdc70e7b..9dbf0b75da5d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -22,7 +22,6 @@
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
#include <linux/shmem_fs.h>
-#include <linux/cleancache.h>
#include <linux/rmap.h>
#include "internal.h"
@@ -205,7 +204,6 @@ static void truncate_cleanup_folio(struct folio *folio)
static int
invalidate_complete_page(struct address_space *mapping, struct page *page)
{
- int ret;
if (page->mapping != mapping)
return 0;
@@ -213,9 +211,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
if (page_has_private(page) && !try_to_release_page(page, 0))
return 0;
- ret = remove_mapping(mapping, page);
-
- return ret;
+ return remove_mapping(mapping, page);
}
int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
@@ -267,7 +263,6 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
*/
folio_zero_range(folio, offset, length);
- cleancache_invalidate_page(folio->mapping, &folio->page);
if (folio_has_private(folio))
do_invalidatepage(&folio->page, offset, length);
if (!folio_test_large(folio))
@@ -354,7 +349,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
bool same_folio;
if (mapping_empty(mapping))
- goto out;
+ return;
/*
* 'start' and 'end' always covers the range of pages to be fully
@@ -445,9 +440,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
folio_batch_release(&fbatch);
index++;
}
-
-out:
- cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -501,10 +493,6 @@ void truncate_inode_pages_final(struct address_space *mapping)
xa_unlock_irq(&mapping->i_pages);
}
- /*
- * Cleancache needs notification even if there are no pages or shadow
- * entries.
- */
truncate_inode_pages(mapping, 0);
}
EXPORT_SYMBOL(truncate_inode_pages_final);
@@ -664,7 +652,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
int did_range_unmap = 0;
if (mapping_empty(mapping))
- goto out;
+ return 0;
folio_batch_init(&fbatch);
index = start;
@@ -728,8 +716,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
if (dax_mapping(mapping)) {
unmap_mapping_pages(mapping, start, end - start + 1, false);
}
-out:
- cleancache_invalidate_inode(mapping);
return ret;
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index ac6f036298cd..0780c2a57ff1 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -232,6 +232,11 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
goto out;
}
+ if (PageHWPoison(page)) {
+ ret = -EIO;
+ goto out_release;
+ }
+
ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
page, false, wp_copy);
if (ret)
diff --git a/mm/util.c b/mm/util.c
index 741ba32a43ac..7e43369064c8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -549,13 +549,10 @@ EXPORT_SYMBOL(vm_mmap);
* Uses kmalloc to get the memory but if the allocation fails then falls back
* to the vmalloc allocator. Use kvfree for freeing the memory.
*
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier.
* __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
* preferable to the vmalloc fallback, due to visible performance drawbacks.
*
- * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
- * fall back to vmalloc.
- *
* Return: pointer to the allocated memory of %NULL in case of failure
*/
void *kvmalloc_node(size_t size, gfp_t flags, int node)
@@ -564,13 +561,6 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
void *ret;
/*
- * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
- * so the given set of flags has to be compatible.
- */
- if ((flags & GFP_KERNEL) != GFP_KERNEL)
- return kmalloc_node(size, flags, node);
-
- /*
* We want to attempt a large physically contiguous block first because
* it is less likely to fragment multiple larger blocks and therefore
* contribute to a long term fragmentation less than vmalloc fallback.
@@ -582,6 +572,9 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
kmalloc_flags |= __GFP_NORETRY;
+
+ /* nofail semantic is implemented by the vmalloc fallback */
+ kmalloc_flags &= ~__GFP_NOFAIL;
}
ret = kmalloc_node(size, kmalloc_flags, node);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d2a00ad4e1dd..4165304d3547 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,7 @@
#include <linux/kmemleak.h>
#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/memcontrol.h>
#include <linux/llist.h>
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
@@ -38,6 +39,7 @@
#include <linux/pgtable.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -2623,12 +2625,13 @@ static void __vunmap(const void *addr, int deallocate_pages)
if (deallocate_pages) {
unsigned int page_order = vm_area_page_order(area);
- int i;
+ int i, step = 1U << page_order;
- for (i = 0; i < area->nr_pages; i += 1U << page_order) {
+ for (i = 0; i < area->nr_pages; i += step) {
struct page *page = area->pages[i];
BUG_ON(!page);
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
__free_pages(page, page_order);
cond_resched();
}
@@ -2844,6 +2847,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* more permissive.
*/
if (!order) {
+ gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL;
+
while (nr_allocated < nr_pages) {
unsigned int nr, nr_pages_request;
@@ -2861,12 +2866,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* but mempolcy want to alloc memory by interleaving.
*/
if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
- nr = alloc_pages_bulk_array_mempolicy(gfp,
+ nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
nr_pages_request,
pages + nr_allocated);
else
- nr = alloc_pages_bulk_array_node(gfp, nid,
+ nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
nr_pages_request,
pages + nr_allocated);
@@ -2921,11 +2926,14 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t orig_gfp_mask = gfp_mask;
+ bool nofail = gfp_mask & __GFP_NOFAIL;
unsigned long addr = (unsigned long)area->addr;
unsigned long size = get_vm_area_size(area);
unsigned long array_size;
unsigned int nr_small_pages = size >> PAGE_SHIFT;
unsigned int page_order;
+ unsigned int flags;
+ int ret;
array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
gfp_mask |= __GFP_NOWARN;
@@ -2955,6 +2963,13 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
+ if (gfp_mask & __GFP_ACCOUNT) {
+ int i, step = 1U << page_order;
+
+ for (i = 0; i < area->nr_pages; i += step)
+ mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
+ step);
+ }
/*
* If not enough pages were obtained to accomplish an
@@ -2967,8 +2982,28 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
- if (vmap_pages_range(addr, addr + size, prot, area->pages,
- page_shift) < 0) {
+ /*
+ * page tables allocations ignore external gfp mask, enforce it
+ * by the scope API
+ */
+ if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
+ flags = memalloc_nofs_save();
+ else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
+ flags = memalloc_noio_save();
+
+ do {
+ ret = vmap_pages_range(addr, addr + size, prot, area->pages,
+ page_shift);
+ if (nofail && (ret < 0))
+ schedule_timeout_uninterruptible(1);
+ } while (nofail && (ret < 0));
+
+ if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
+ memalloc_nofs_restore(flags);
+ else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
+ memalloc_noio_restore(flags);
+
+ if (ret < 0) {
warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, failed to map pages",
area->nr_pages * PAGE_SIZE);
@@ -2996,12 +3031,14 @@ fail:
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Please note that the full set of gfp
- * flags are not supported. GFP_KERNEL would be a preferred allocation mode
- * but GFP_NOFS and GFP_NOIO are supported as well. Zone modifiers are not
- * supported. From the reclaim modifiers__GFP_DIRECT_RECLAIM is required (aka
- * GFP_NOWAIT is not supported) and only __GFP_NOFAIL is supported (aka
- * __GFP_NORETRY and __GFP_RETRY_MAYFAIL are not supported).
- * __GFP_NOWARN can be used to suppress error messages about failures.
+ * flags are not supported. GFP_KERNEL, GFP_NOFS and GFP_NOIO are all
+ * supported.
+ * Zone modifiers are not supported. From the reclaim modifiers
+ * __GFP_DIRECT_RECLAIM is required (aka GFP_NOWAIT is not supported)
+ * and only __GFP_NOFAIL is supported (i.e. __GFP_NORETRY and
+ * __GFP_RETRY_MAYFAIL are not supported).
+ *
+ * __GFP_NOWARN can be used to suppress failures messages.
*
* Map them into contiguous kernel virtual space, using a pagetable
* protection of @prot.
@@ -3056,9 +3093,14 @@ again:
VM_UNINITIALIZED | vm_flags, start, end, node,
gfp_mask, caller);
if (!area) {
+ bool nofail = gfp_mask & __GFP_NOFAIL;
warn_alloc(gfp_mask, NULL,
- "vmalloc error: size %lu, vm_struct allocation failed",
- real_size);
+ "vmalloc error: size %lu, vm_struct allocation failed%s",
+ real_size, (nofail) ? ". Retrying." : "");
+ if (nofail) {
+ schedule_timeout_uninterruptible(1);
+ goto again;
+ }
goto fail;
}
@@ -3074,7 +3116,8 @@ again:
clear_vm_uninitialized_flag(area);
size = PAGE_ALIGN(size);
- kmemleak_vmalloc(area, size, gfp_mask);
+ if (!(vm_flags & VM_DEFER_KMEMLEAK))
+ kmemleak_vmalloc(area, size, gfp_mask);
return addr;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 700434db5735..090bfb605ecf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -951,7 +951,7 @@ out:
return freed;
}
-void drop_slab_node(int nid)
+static void drop_slab_node(int nid)
{
unsigned long freed;
int shift = 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d701c335628c..4057372745d0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1353,6 +1353,9 @@ const char * const vmstat_text[] = {
"thp_split_page_failed",
"thp_deferred_split_page",
"thp_split_pmd",
+ "thp_scan_exceed_none_pte",
+ "thp_scan_exceed_swap_pte",
+ "thp_scan_exceed_share_pte",
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
"thp_split_pud",
#endif
diff --git a/mm/zpool.c b/mm/zpool.c
index 6d9ed48141e5..68facc193496 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -24,16 +24,11 @@ struct zpool {
const struct zpool_ops *ops;
bool evictable;
bool can_sleep_mapped;
-
- struct list_head list;
};
static LIST_HEAD(drivers_head);
static DEFINE_SPINLOCK(drivers_lock);
-static LIST_HEAD(pools_head);
-static DEFINE_SPINLOCK(pools_lock);
-
/**
* zpool_register_driver() - register a zpool implementation.
* @driver: driver to register
@@ -195,10 +190,6 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
pr_debug("created pool type %s\n", type);
- spin_lock(&pools_lock);
- list_add(&zpool->list, &pools_head);
- spin_unlock(&pools_lock);
-
return zpool;
}
@@ -217,9 +208,6 @@ void zpool_destroy_pool(struct zpool *zpool)
{
pr_debug("destroying pool type %s\n", zpool->driver->type);
- spin_lock(&pools_lock);
- list_del(&zpool->list);
- spin_unlock(&pools_lock);
zpool->driver->destroy(zpool->pool);
zpool_put_driver(zpool->driver);
kfree(zpool);
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 0d3b65939016..9152fbde33b5 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -30,6 +30,14 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/*
+ * lock ordering:
+ * page_lock
+ * pool->migrate_lock
+ * class->lock
+ * zspage->lock
+ */
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -57,6 +65,7 @@
#include <linux/wait.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
+#include <linux/local_lock.h>
#define ZSPAGE_MAGIC 0x58
@@ -101,15 +110,6 @@
#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
/*
- * Memory for allocating for handle keeps object position by
- * encoding <page, obj_idx> and the encoded value has a room
- * in least bit(ie, look at obj_to_location).
- * We use the bit to synchronize between object access by
- * user and migration.
- */
-#define HANDLE_PIN_BIT 0
-
-/*
* Head in allocated object should have OBJ_ALLOCATED_TAG
* to identify the object was allocated or not.
* It's okay to add the status bit in the least bit because
@@ -121,6 +121,7 @@
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+#define HUGE_BITS 1
#define FULLNESS_BITS 2
#define CLASS_BITS 8
#define ISOLATED_BITS 3
@@ -158,7 +159,7 @@ enum fullness_group {
NR_ZS_FULLNESS,
};
-enum zs_stat_type {
+enum class_stat_type {
CLASS_EMPTY,
CLASS_ALMOST_EMPTY,
CLASS_ALMOST_FULL,
@@ -213,22 +214,6 @@ struct size_class {
struct zs_size_stat stats;
};
-/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
-static void SetPageHugeObject(struct page *page)
-{
- SetPageOwnerPriv1(page);
-}
-
-static void ClearPageHugeObject(struct page *page)
-{
- ClearPageOwnerPriv1(page);
-}
-
-static int PageHugeObject(struct page *page)
-{
- return PageOwnerPriv1(page);
-}
-
/*
* Placed within free objects to form a singly linked list.
* For every zspage, zspage->freeobj gives head of this list.
@@ -269,15 +254,14 @@ struct zs_pool {
#ifdef CONFIG_COMPACTION
struct inode *inode;
struct work_struct free_work;
- /* A wait queue for when migration races with async_free_zspage() */
- struct wait_queue_head migration_wait;
- atomic_long_t isolated_pages;
- bool destroying;
#endif
+ /* protect page/zspage migration */
+ rwlock_t migrate_lock;
};
struct zspage {
struct {
+ unsigned int huge:HUGE_BITS;
unsigned int fullness:FULLNESS_BITS;
unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS;
@@ -293,17 +277,32 @@ struct zspage {
};
struct mapping_area {
+ local_lock_t lock;
char *vm_buf; /* copy buffer for objects that span pages */
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
};
+/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+static void SetZsHugePage(struct zspage *zspage)
+{
+ zspage->huge = 1;
+}
+
+static bool ZsHugePage(struct zspage *zspage)
+{
+ return zspage->huge;
+}
+
#ifdef CONFIG_COMPACTION
static int zs_register_migration(struct zs_pool *pool);
static void zs_unregister_migration(struct zs_pool *pool);
static void migrate_lock_init(struct zspage *zspage);
static void migrate_read_lock(struct zspage *zspage);
static void migrate_read_unlock(struct zspage *zspage);
+static void migrate_write_lock(struct zspage *zspage);
+static void migrate_write_lock_nested(struct zspage *zspage);
+static void migrate_write_unlock(struct zspage *zspage);
static void kick_deferred_free(struct zs_pool *pool);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
@@ -315,6 +314,9 @@ static void zs_unregister_migration(struct zs_pool *pool) {}
static void migrate_lock_init(struct zspage *zspage) {}
static void migrate_read_lock(struct zspage *zspage) {}
static void migrate_read_unlock(struct zspage *zspage) {}
+static void migrate_write_lock(struct zspage *zspage) {}
+static void migrate_write_lock_nested(struct zspage *zspage) {}
+static void migrate_write_unlock(struct zspage *zspage) {}
static void kick_deferred_free(struct zs_pool *pool) {}
static void init_deferred_free(struct zs_pool *pool) {}
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
@@ -366,14 +368,10 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
kmem_cache_free(pool->zspage_cachep, zspage);
}
+/* class->lock(which owns the handle) synchronizes races */
static void record_obj(unsigned long handle, unsigned long obj)
{
- /*
- * lsb of @obj represents handle lock while other bits
- * represent object value the handle is pointing so
- * updating shouldn't do store tearing.
- */
- WRITE_ONCE(*(unsigned long *)handle, obj);
+ *(unsigned long *)handle = obj;
}
/* zpool driver */
@@ -455,12 +453,9 @@ MODULE_ALIAS("zpool-zsmalloc");
#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
-
-static bool is_zspage_isolated(struct zspage *zspage)
-{
- return zspage->isolated;
-}
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static __maybe_unused int is_first_page(struct page *page)
{
@@ -517,6 +512,12 @@ static void get_zspage_mapping(struct zspage *zspage,
*class_idx = zspage->class;
}
+static struct size_class *zspage_class(struct zs_pool *pool,
+ struct zspage *zspage)
+{
+ return pool->size_class[zspage->class];
+}
+
static void set_zspage_mapping(struct zspage *zspage,
unsigned int class_idx,
enum fullness_group fullness)
@@ -543,21 +544,21 @@ static int get_size_class_index(int size)
return min_t(int, ZS_SIZE_CLASSES - 1, idx);
}
-/* type can be of enum type zs_stat_type or fullness_group */
-static inline void zs_stat_inc(struct size_class *class,
+/* type can be of enum type class_stat_type or fullness_group */
+static inline void class_stat_inc(struct size_class *class,
int type, unsigned long cnt)
{
class->stats.objs[type] += cnt;
}
-/* type can be of enum type zs_stat_type or fullness_group */
-static inline void zs_stat_dec(struct size_class *class,
+/* type can be of enum type class_stat_type or fullness_group */
+static inline void class_stat_dec(struct size_class *class,
int type, unsigned long cnt)
{
class->stats.objs[type] -= cnt;
}
-/* type can be of enum type zs_stat_type or fullness_group */
+/* type can be of enum type class_stat_type or fullness_group */
static inline unsigned long zs_stat_get(struct size_class *class,
int type)
{
@@ -719,7 +720,7 @@ static void insert_zspage(struct size_class *class,
{
struct zspage *head;
- zs_stat_inc(class, fullness, 1);
+ class_stat_inc(class, fullness, 1);
head = list_first_entry_or_null(&class->fullness_list[fullness],
struct zspage, list);
/*
@@ -741,10 +742,9 @@ static void remove_zspage(struct size_class *class,
enum fullness_group fullness)
{
VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
- VM_BUG_ON(is_zspage_isolated(zspage));
list_del_init(&zspage->list);
- zs_stat_dec(class, fullness, 1);
+ class_stat_dec(class, fullness, 1);
}
/*
@@ -767,13 +767,9 @@ static enum fullness_group fix_fullness_group(struct size_class *class,
if (newfg == currfg)
goto out;
- if (!is_zspage_isolated(zspage)) {
- remove_zspage(class, zspage, currfg);
- insert_zspage(class, zspage, newfg);
- }
-
+ remove_zspage(class, zspage, currfg);
+ insert_zspage(class, zspage, newfg);
set_zspage_mapping(zspage, class_idx, newfg);
-
out:
return newfg;
}
@@ -824,7 +820,9 @@ static struct zspage *get_zspage(struct page *page)
static struct page *get_next_page(struct page *page)
{
- if (unlikely(PageHugeObject(page)))
+ struct zspage *zspage = get_zspage(page);
+
+ if (unlikely(ZsHugePage(zspage)))
return NULL;
return (struct page *)page->index;
@@ -844,6 +842,12 @@ static void obj_to_location(unsigned long obj, struct page **page,
*obj_idx = (obj & OBJ_INDEX_MASK);
}
+static void obj_to_page(unsigned long obj, struct page **page)
+{
+ obj >>= OBJ_TAG_BITS;
+ *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
+}
+
/**
* location_to_obj - get obj value encoded from (<page>, <obj_idx>)
* @page: page object resides in zspage
@@ -865,33 +869,22 @@ static unsigned long handle_to_obj(unsigned long handle)
return *(unsigned long *)handle;
}
-static unsigned long obj_to_head(struct page *page, void *obj)
+static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
{
- if (unlikely(PageHugeObject(page))) {
+ unsigned long handle;
+ struct zspage *zspage = get_zspage(page);
+
+ if (unlikely(ZsHugePage(zspage))) {
VM_BUG_ON_PAGE(!is_first_page(page), page);
- return page->index;
+ handle = page->index;
} else
- return *(unsigned long *)obj;
-}
-
-static inline int testpin_tag(unsigned long handle)
-{
- return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
-
-static inline int trypin_tag(unsigned long handle)
-{
- return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
+ handle = *(unsigned long *)obj;
-static void pin_tag(unsigned long handle) __acquires(bitlock)
-{
- bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
-}
+ if (!(handle & OBJ_ALLOCATED_TAG))
+ return false;
-static void unpin_tag(unsigned long handle) __releases(bitlock)
-{
- bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+ *phandle = handle & ~OBJ_ALLOCATED_TAG;
+ return true;
}
static void reset_page(struct page *page)
@@ -900,7 +893,6 @@ static void reset_page(struct page *page)
ClearPagePrivate(page);
set_page_private(page, 0);
page_mapcount_reset(page);
- ClearPageHugeObject(page);
page->index = 0;
}
@@ -952,7 +944,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
cache_free_zspage(pool, zspage);
- zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
atomic_long_sub(class->pages_per_zspage,
&pool->pages_allocated);
}
@@ -963,6 +955,11 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
VM_BUG_ON(get_zspage_inuse(zspage));
VM_BUG_ON(list_empty(&zspage->list));
+ /*
+ * Since zs_free couldn't be sleepable, this function cannot call
+ * lock_page. The page locks trylock_zspage got will be released
+ * by __free_zspage.
+ */
if (!trylock_zspage(zspage)) {
kick_deferred_free(pool);
return;
@@ -1042,7 +1039,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
SetPagePrivate(page);
if (unlikely(class->objs_per_zspage == 1 &&
class->pages_per_zspage == 1))
- SetPageHugeObject(page);
+ SetZsHugePage(zspage);
} else {
prev_page->index = (unsigned long)page;
}
@@ -1246,8 +1243,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
unsigned long obj, off;
unsigned int obj_idx;
- unsigned int class_idx;
- enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
struct page *pages[2];
@@ -1260,21 +1255,26 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
*/
BUG_ON(in_interrupt());
- /* From now on, migration cannot move the object */
- pin_tag(handle);
-
+ /* It guarantees it can get zspage from handle safely */
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
- /* migration cannot move any subpage in this zspage */
+ /*
+ * migration cannot move any zpages in this zspage. Here, class->lock
+ * is too heavy since callers would take some time until they calls
+ * zs_unmap_object API so delegate the locking from class to zspage
+ * which is smaller granularity.
+ */
migrate_read_lock(zspage);
+ read_unlock(&pool->migrate_lock);
- get_zspage_mapping(zspage, &class_idx, &fg);
- class = pool->size_class[class_idx];
+ class = zspage_class(pool, zspage);
off = (class->size * obj_idx) & ~PAGE_MASK;
- area = &get_cpu_var(zs_map_area);
+ local_lock(&zs_map_area.lock);
+ area = this_cpu_ptr(&zs_map_area);
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
@@ -1290,7 +1290,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
ret = __zs_map_object(area, pages, off, class->size);
out:
- if (likely(!PageHugeObject(page)))
+ if (likely(!ZsHugePage(zspage)))
ret += ZS_HANDLE_SIZE;
return ret;
@@ -1304,16 +1304,13 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
unsigned long obj, off;
unsigned int obj_idx;
- unsigned int class_idx;
- enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
obj = handle_to_obj(handle);
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
- get_zspage_mapping(zspage, &class_idx, &fg);
- class = pool->size_class[class_idx];
+ class = zspage_class(pool, zspage);
off = (class->size * obj_idx) & ~PAGE_MASK;
area = this_cpu_ptr(&zs_map_area);
@@ -1328,10 +1325,9 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
__zs_unmap_object(area, pages, off, class->size);
}
- put_cpu_var(zs_map_area);
+ local_unlock(&zs_map_area.lock);
migrate_read_unlock(zspage);
- unpin_tag(handle);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -1354,17 +1350,19 @@ size_t zs_huge_class_size(struct zs_pool *pool)
}
EXPORT_SYMBOL_GPL(zs_huge_class_size);
-static unsigned long obj_malloc(struct size_class *class,
+static unsigned long obj_malloc(struct zs_pool *pool,
struct zspage *zspage, unsigned long handle)
{
int i, nr_page, offset;
unsigned long obj;
struct link_free *link;
+ struct size_class *class;
struct page *m_page;
unsigned long m_offset;
void *vaddr;
+ class = pool->size_class[zspage->class];
handle |= OBJ_ALLOCATED_TAG;
obj = get_freeobj(zspage);
@@ -1379,7 +1377,7 @@ static unsigned long obj_malloc(struct size_class *class,
vaddr = kmap_atomic(m_page);
link = (struct link_free *)vaddr + m_offset / sizeof(*link);
set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
- if (likely(!PageHugeObject(m_page)))
+ if (likely(!ZsHugePage(zspage)))
/* record handle in the header of allocated chunk */
link->handle = handle;
else
@@ -1388,7 +1386,6 @@ static unsigned long obj_malloc(struct size_class *class,
kunmap_atomic(vaddr);
mod_zspage_inuse(zspage, 1);
- zs_stat_inc(class, OBJ_USED, 1);
obj = location_to_obj(m_page, obj);
@@ -1424,13 +1421,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
size += ZS_HANDLE_SIZE;
class = pool->size_class[get_size_class_index(size)];
+ /* class->lock effectively protects the zpage migration */
spin_lock(&class->lock);
zspage = find_get_zspage(class);
if (likely(zspage)) {
- obj = obj_malloc(class, zspage, handle);
+ obj = obj_malloc(pool, zspage, handle);
/* Now move the zspage to another fullness group, if required */
fix_fullness_group(class, zspage);
record_obj(handle, obj);
+ class_stat_inc(class, OBJ_USED, 1);
spin_unlock(&class->lock);
return handle;
@@ -1445,14 +1444,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
}
spin_lock(&class->lock);
- obj = obj_malloc(class, zspage, handle);
+ obj = obj_malloc(pool, zspage, handle);
newfg = get_fullness_group(class, zspage);
insert_zspage(class, zspage, newfg);
set_zspage_mapping(zspage, class->index, newfg);
record_obj(handle, obj);
atomic_long_add(class->pages_per_zspage,
&pool->pages_allocated);
- zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ class_stat_inc(class, OBJ_USED, 1);
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
@@ -1462,7 +1462,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(zs_malloc);
-static void obj_free(struct size_class *class, unsigned long obj)
+static void obj_free(int class_size, unsigned long obj)
{
struct link_free *link;
struct zspage *zspage;
@@ -1472,18 +1472,20 @@ static void obj_free(struct size_class *class, unsigned long obj)
void *vaddr;
obj_to_location(obj, &f_page, &f_objidx);
- f_offset = (class->size * f_objidx) & ~PAGE_MASK;
+ f_offset = (class_size * f_objidx) & ~PAGE_MASK;
zspage = get_zspage(f_page);
vaddr = kmap_atomic(f_page);
/* Insert this object in containing zspage's freelist */
link = (struct link_free *)(vaddr + f_offset);
- link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ if (likely(!ZsHugePage(zspage)))
+ link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ else
+ f_page->index = 0;
kunmap_atomic(vaddr);
set_freeobj(zspage, f_objidx);
mod_zspage_inuse(zspage, -1);
- zs_stat_dec(class, OBJ_USED, 1);
}
void zs_free(struct zs_pool *pool, unsigned long handle)
@@ -1491,42 +1493,33 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
struct zspage *zspage;
struct page *f_page;
unsigned long obj;
- unsigned int f_objidx;
- int class_idx;
struct size_class *class;
enum fullness_group fullness;
- bool isolated;
if (unlikely(!handle))
return;
- pin_tag(handle);
+ /*
+ * The pool->migrate_lock protects the race with zpage's migration
+ * so it's safe to get the page from handle.
+ */
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
- obj_to_location(obj, &f_page, &f_objidx);
+ obj_to_page(obj, &f_page);
zspage = get_zspage(f_page);
-
- migrate_read_lock(zspage);
-
- get_zspage_mapping(zspage, &class_idx, &fullness);
- class = pool->size_class[class_idx];
-
+ class = zspage_class(pool, zspage);
spin_lock(&class->lock);
- obj_free(class, obj);
+ read_unlock(&pool->migrate_lock);
+
+ obj_free(class->size, obj);
+ class_stat_dec(class, OBJ_USED, 1);
fullness = fix_fullness_group(class, zspage);
- if (fullness != ZS_EMPTY) {
- migrate_read_unlock(zspage);
+ if (fullness != ZS_EMPTY)
goto out;
- }
- isolated = is_zspage_isolated(zspage);
- migrate_read_unlock(zspage);
- /* If zspage is isolated, zs_page_putback will free the zspage */
- if (likely(!isolated))
- free_zspage(pool, class, zspage);
+ free_zspage(pool, class, zspage);
out:
-
spin_unlock(&class->lock);
- unpin_tag(handle);
cache_free_handle(pool, handle);
}
EXPORT_SYMBOL_GPL(zs_free);
@@ -1601,7 +1594,6 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
static unsigned long find_alloced_obj(struct size_class *class,
struct page *page, int *obj_idx)
{
- unsigned long head;
int offset = 0;
int index = *obj_idx;
unsigned long handle = 0;
@@ -1611,13 +1603,8 @@ static unsigned long find_alloced_obj(struct size_class *class,
offset += class->size * index;
while (offset < PAGE_SIZE) {
- head = obj_to_head(page, addr + offset);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- if (trypin_tag(handle))
- break;
- handle = 0;
- }
+ if (obj_allocated(page, addr + offset, &handle))
+ break;
offset += class->size;
index++;
@@ -1663,25 +1650,16 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
/* Stop if there is no more space */
if (zspage_full(class, get_zspage(d_page))) {
- unpin_tag(handle);
ret = -ENOMEM;
break;
}
used_obj = handle_to_obj(handle);
- free_obj = obj_malloc(class, get_zspage(d_page), handle);
+ free_obj = obj_malloc(pool, get_zspage(d_page), handle);
zs_object_copy(class, free_obj, used_obj);
obj_idx++;
- /*
- * record_obj updates handle's value to free_obj and it will
- * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
- * breaks synchronization using pin_tag(e,g, zs_free) so
- * let's keep the lock bit.
- */
- free_obj |= BIT(HANDLE_PIN_BIT);
record_obj(handle, free_obj);
- unpin_tag(handle);
- obj_free(class, used_obj);
+ obj_free(class->size, used_obj);
}
/* Remember last position in this iteration */
@@ -1706,7 +1684,6 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)
zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
struct zspage, list);
if (zspage) {
- VM_BUG_ON(is_zspage_isolated(zspage));
remove_zspage(class, zspage, fg[i]);
return zspage;
}
@@ -1727,8 +1704,6 @@ static enum fullness_group putback_zspage(struct size_class *class,
{
enum fullness_group fullness;
- VM_BUG_ON(is_zspage_isolated(zspage));
-
fullness = get_fullness_group(class, zspage);
insert_zspage(class, zspage, fullness);
set_zspage_mapping(zspage, class->index, fullness);
@@ -1797,6 +1772,11 @@ static void migrate_write_lock(struct zspage *zspage)
write_lock(&zspage->lock);
}
+static void migrate_write_lock_nested(struct zspage *zspage)
+{
+ write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
+}
+
static void migrate_write_unlock(struct zspage *zspage)
{
write_unlock(&zspage->lock);
@@ -1810,35 +1790,10 @@ static void inc_zspage_isolation(struct zspage *zspage)
static void dec_zspage_isolation(struct zspage *zspage)
{
+ VM_BUG_ON(zspage->isolated == 0);
zspage->isolated--;
}
-static void putback_zspage_deferred(struct zs_pool *pool,
- struct size_class *class,
- struct zspage *zspage)
-{
- enum fullness_group fg;
-
- fg = putback_zspage(class, zspage);
- if (fg == ZS_EMPTY)
- schedule_work(&pool->free_work);
-
-}
-
-static inline void zs_pool_dec_isolated(struct zs_pool *pool)
-{
- VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
- atomic_long_dec(&pool->isolated_pages);
- /*
- * Checking pool->destroying must happen after atomic_long_dec()
- * for pool->isolated_pages above. Paired with the smp_mb() in
- * zs_unregister_migration().
- */
- smp_mb__after_atomic();
- if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
- wake_up_all(&pool->migration_wait);
-}
-
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
struct page *newpage, struct page *oldpage)
{
@@ -1857,19 +1812,14 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
create_page_chain(class, zspage, pages);
set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
- if (unlikely(PageHugeObject(oldpage)))
+ if (unlikely(ZsHugePage(zspage)))
newpage->index = oldpage->index;
__SetPageMovable(newpage, page_mapping(oldpage));
}
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{
- struct zs_pool *pool;
- struct size_class *class;
- int class_idx;
- enum fullness_group fullness;
struct zspage *zspage;
- struct address_space *mapping;
/*
* Page is locked so zspage couldn't be destroyed. For detail, look at
@@ -1879,41 +1829,9 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page);
-
- /*
- * Without class lock, fullness could be stale while class_idx is okay
- * because class_idx is constant unless page is freed so we should get
- * fullness again under class lock.
- */
- get_zspage_mapping(zspage, &class_idx, &fullness);
- mapping = page_mapping(page);
- pool = mapping->private_data;
- class = pool->size_class[class_idx];
-
- spin_lock(&class->lock);
- if (get_zspage_inuse(zspage) == 0) {
- spin_unlock(&class->lock);
- return false;
- }
-
- /* zspage is isolated for object migration */
- if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
- spin_unlock(&class->lock);
- return false;
- }
-
- /*
- * If this is first time isolation for the zspage, isolate zspage from
- * size_class to prevent further object allocation from the zspage.
- */
- if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
- get_zspage_mapping(zspage, &class_idx, &fullness);
- atomic_long_inc(&pool->isolated_pages);
- remove_zspage(class, zspage, fullness);
- }
-
+ migrate_write_lock(zspage);
inc_zspage_isolation(zspage);
- spin_unlock(&class->lock);
+ migrate_write_unlock(zspage);
return true;
}
@@ -1923,16 +1841,13 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
{
struct zs_pool *pool;
struct size_class *class;
- int class_idx;
- enum fullness_group fullness;
struct zspage *zspage;
struct page *dummy;
void *s_addr, *d_addr, *addr;
- int offset, pos;
- unsigned long handle, head;
+ int offset;
+ unsigned long handle;
unsigned long old_obj, new_obj;
unsigned int obj_idx;
- int ret = -EAGAIN;
/*
* We cannot support the _NO_COPY case here, because copy needs to
@@ -1945,35 +1860,25 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
- zspage = get_zspage(page);
-
- /* Concurrent compactor cannot migrate any subpage in zspage */
- migrate_write_lock(zspage);
- get_zspage_mapping(zspage, &class_idx, &fullness);
pool = mapping->private_data;
- class = pool->size_class[class_idx];
- offset = get_first_obj_offset(page);
+ /*
+ * The pool migrate_lock protects the race between zpage migration
+ * and zs_free.
+ */
+ write_lock(&pool->migrate_lock);
+ zspage = get_zspage(page);
+ class = zspage_class(pool, zspage);
+
+ /*
+ * the class lock protects zpage alloc/free in the zspage.
+ */
spin_lock(&class->lock);
- if (!get_zspage_inuse(zspage)) {
- /*
- * Set "offset" to end of the page so that every loops
- * skips unnecessary object scanning.
- */
- offset = PAGE_SIZE;
- }
+ /* the migrate_write_lock protects zpage access via zs_map_object */
+ migrate_write_lock(zspage);
- pos = offset;
+ offset = get_first_obj_offset(page);
s_addr = kmap_atomic(page);
- while (pos < PAGE_SIZE) {
- head = obj_to_head(page, s_addr + pos);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- if (!trypin_tag(handle))
- goto unpin_objects;
- }
- pos += class->size;
- }
/*
* Here, any user cannot access all objects in the zspage so let's move.
@@ -1982,42 +1887,30 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
memcpy(d_addr, s_addr, PAGE_SIZE);
kunmap_atomic(d_addr);
- for (addr = s_addr + offset; addr < s_addr + pos;
+ for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE;
addr += class->size) {
- head = obj_to_head(page, addr);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- BUG_ON(!testpin_tag(handle));
+ if (obj_allocated(page, addr, &handle)) {
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
new_obj = (unsigned long)location_to_obj(newpage,
obj_idx);
- new_obj |= BIT(HANDLE_PIN_BIT);
record_obj(handle, new_obj);
}
}
+ kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page);
- get_page(newpage);
-
- dec_zspage_isolation(zspage);
-
/*
- * Page migration is done so let's putback isolated zspage to
- * the list if @page is final isolated subpage in the zspage.
+ * Since we complete the data copy and set up new zspage structure,
+ * it's okay to release migration_lock.
*/
- if (!is_zspage_isolated(zspage)) {
- /*
- * We cannot race with zs_destroy_pool() here because we wait
- * for isolation to hit zero before we start destroying.
- * Also, we ensure that everyone can see pool->destroying before
- * we start waiting.
- */
- putback_zspage_deferred(pool, class, zspage);
- zs_pool_dec_isolated(pool);
- }
+ write_unlock(&pool->migrate_lock);
+ spin_unlock(&class->lock);
+ dec_zspage_isolation(zspage);
+ migrate_write_unlock(zspage);
+ get_page(newpage);
if (page_zone(newpage) != page_zone(page)) {
dec_zone_page_state(page, NR_ZSPAGES);
inc_zone_page_state(newpage, NR_ZSPAGES);
@@ -2025,55 +1918,21 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
reset_page(page);
put_page(page);
- page = newpage;
-
- ret = MIGRATEPAGE_SUCCESS;
-unpin_objects:
- for (addr = s_addr + offset; addr < s_addr + pos;
- addr += class->size) {
- head = obj_to_head(page, addr);
- if (head & OBJ_ALLOCATED_TAG) {
- handle = head & ~OBJ_ALLOCATED_TAG;
- BUG_ON(!testpin_tag(handle));
- unpin_tag(handle);
- }
- }
- kunmap_atomic(s_addr);
- spin_unlock(&class->lock);
- migrate_write_unlock(zspage);
- return ret;
+ return MIGRATEPAGE_SUCCESS;
}
static void zs_page_putback(struct page *page)
{
- struct zs_pool *pool;
- struct size_class *class;
- int class_idx;
- enum fullness_group fg;
- struct address_space *mapping;
struct zspage *zspage;
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page);
- get_zspage_mapping(zspage, &class_idx, &fg);
- mapping = page_mapping(page);
- pool = mapping->private_data;
- class = pool->size_class[class_idx];
-
- spin_lock(&class->lock);
+ migrate_write_lock(zspage);
dec_zspage_isolation(zspage);
- if (!is_zspage_isolated(zspage)) {
- /*
- * Due to page_lock, we cannot free zspage immediately
- * so let's defer.
- */
- putback_zspage_deferred(pool, class, zspage);
- zs_pool_dec_isolated(pool);
- }
- spin_unlock(&class->lock);
+ migrate_write_unlock(zspage);
}
static const struct address_space_operations zsmalloc_aops = {
@@ -2095,36 +1954,8 @@ static int zs_register_migration(struct zs_pool *pool)
return 0;
}
-static bool pool_isolated_are_drained(struct zs_pool *pool)
-{
- return atomic_long_read(&pool->isolated_pages) == 0;
-}
-
-/* Function for resolving migration */
-static void wait_for_isolated_drain(struct zs_pool *pool)
-{
-
- /*
- * We're in the process of destroying the pool, so there are no
- * active allocations. zs_page_isolate() fails for completely free
- * zspages, so we need only wait for the zs_pool's isolated
- * count to hit zero.
- */
- wait_event(pool->migration_wait,
- pool_isolated_are_drained(pool));
-}
-
static void zs_unregister_migration(struct zs_pool *pool)
{
- pool->destroying = true;
- /*
- * We need a memory barrier here to ensure global visibility of
- * pool->destroying. Thus pool->isolated pages will either be 0 in which
- * case we don't care, or it will be > 0 and pool->destroying will
- * ensure that we wake up once isolation hits 0.
- */
- smp_mb();
- wait_for_isolated_drain(pool); /* This can block */
flush_work(&pool->free_work);
iput(pool->inode);
}
@@ -2154,7 +1985,6 @@ static void async_free_zspage(struct work_struct *work)
spin_unlock(&class->lock);
}
-
list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
list_del(&zspage->list);
lock_zspage(zspage);
@@ -2218,8 +2048,13 @@ static unsigned long __zs_compact(struct zs_pool *pool,
struct zspage *dst_zspage = NULL;
unsigned long pages_freed = 0;
+ /* protect the race between zpage migration and zs_free */
+ write_lock(&pool->migrate_lock);
+ /* protect zpage allocation/free */
spin_lock(&class->lock);
while ((src_zspage = isolate_zspage(class, true))) {
+ /* protect someone accessing the zspage(i.e., zs_map_object) */
+ migrate_write_lock(src_zspage);
if (!zs_can_compact(class))
break;
@@ -2228,6 +2063,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
cc.s_page = get_first_page(src_zspage);
while ((dst_zspage = isolate_zspage(class, false))) {
+ migrate_write_lock_nested(dst_zspage);
+
cc.d_page = get_first_page(dst_zspage);
/*
* If there is no more space in dst_page, resched
@@ -2237,6 +2074,10 @@ static unsigned long __zs_compact(struct zs_pool *pool,
break;
putback_zspage(class, dst_zspage);
+ migrate_write_unlock(dst_zspage);
+ dst_zspage = NULL;
+ if (rwlock_is_contended(&pool->migrate_lock))
+ break;
}
/* Stop if we couldn't find slot */
@@ -2244,19 +2085,28 @@ static unsigned long __zs_compact(struct zs_pool *pool,
break;
putback_zspage(class, dst_zspage);
+ migrate_write_unlock(dst_zspage);
+
if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
+ migrate_write_unlock(src_zspage);
free_zspage(pool, class, src_zspage);
pages_freed += class->pages_per_zspage;
- }
+ } else
+ migrate_write_unlock(src_zspage);
spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
cond_resched();
+ write_lock(&pool->migrate_lock);
spin_lock(&class->lock);
}
- if (src_zspage)
+ if (src_zspage) {
putback_zspage(class, src_zspage);
+ migrate_write_unlock(src_zspage);
+ }
spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
return pages_freed;
}
@@ -2362,15 +2212,12 @@ struct zs_pool *zs_create_pool(const char *name)
return NULL;
init_deferred_free(pool);
+ rwlock_init(&pool->migrate_lock);
pool->name = kstrdup(name, GFP_KERNEL);
if (!pool->name)
goto err;
-#ifdef CONFIG_COMPACTION
- init_waitqueue_head(&pool->migration_wait);
-#endif
-
if (create_cache(pool))
goto err;
diff --git a/mm/zswap.c b/mm/zswap.c
index 7944e3e57e78..cdf6950fcb2e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1378,7 +1378,7 @@ static void zswap_frontswap_init(unsigned type)
zswap_trees[type] = tree;
}
-static struct frontswap_ops zswap_frontswap_ops = {
+static const struct frontswap_ops zswap_frontswap_ops = {
.store = zswap_frontswap_store,
.load = zswap_frontswap_load,
.invalidate_page = zswap_frontswap_invalidate_page,
@@ -1475,11 +1475,15 @@ static int __init init_zswap(void)
if (!shrink_wq)
goto fallback_fail;
- frontswap_register_ops(&zswap_frontswap_ops);
+ ret = frontswap_register_ops(&zswap_frontswap_ops);
+ if (ret)
+ goto destroy_wq;
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
return 0;
+destroy_wq:
+ destroy_workqueue(shrink_wq);
fallback_fail:
if (pool)
zswap_pool_destroy(pool);
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 64468c49791f..deabbd376cb1 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -15,6 +15,13 @@ menuconfig NET_9P
if NET_9P
+config NET_9P_FD
+ default NET_9P
+ tristate "9P FD Transport"
+ help
+ This builds support for transports over TCP, Unix sockets and
+ filedescriptors.
+
config NET_9P_VIRTIO
depends on VIRTIO
tristate "9P Virtio Transport"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index aa0a5641e5d0..1df9b344c30b 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NET_9P) := 9pnet.o
+obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
@@ -9,9 +10,11 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
client.o \
error.o \
protocol.o \
- trans_fd.o \
trans_common.o \
+9pnet_fd-objs := \
+ trans_fd.o \
+
9pnet_virtio-objs := \
trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index d062f1e5bfb0..8bba0d9cf975 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1038,8 +1038,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto put_trans;
- if (clnt->msize > clnt->trans_mod->maxsize)
+ if (clnt->msize > clnt->trans_mod->maxsize) {
clnt->msize = clnt->trans_mod->maxsize;
+ pr_info("Limiting 'msize' to %d as this is the maximum "
+ "supported by transport %s\n",
+ clnt->msize, clnt->trans_mod->name
+ );
+ }
if (clnt->msize < 4096) {
p9_debug(P9_DEBUG_ERROR,
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c37fc201a944..55576c1866fa 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -83,7 +83,7 @@ void v9fs_unregister_trans(struct p9_trans_module *m)
}
EXPORT_SYMBOL(v9fs_unregister_trans);
-static struct p9_trans_module *_p9_get_trans_by_name(char *s)
+static struct p9_trans_module *_p9_get_trans_by_name(const char *s)
{
struct p9_trans_module *t, *found = NULL;
@@ -106,7 +106,7 @@ static struct p9_trans_module *_p9_get_trans_by_name(char *s)
* @s: string identifying transport
*
*/
-struct p9_trans_module *v9fs_get_trans_by_name(char *s)
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s)
{
struct p9_trans_module *found = NULL;
@@ -123,6 +123,10 @@ struct p9_trans_module *v9fs_get_trans_by_name(char *s)
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
+static const char * const v9fs_default_transports[] = {
+ "virtio", "tcp", "fd", "unix", "xen", "rdma",
+};
+
/**
* v9fs_get_default_trans - get the default transport
*
@@ -131,6 +135,7 @@ EXPORT_SYMBOL(v9fs_get_trans_by_name);
struct p9_trans_module *v9fs_get_default_trans(void)
{
struct p9_trans_module *t, *found = NULL;
+ int i;
spin_lock(&v9fs_trans_lock);
@@ -148,6 +153,10 @@ struct p9_trans_module *v9fs_get_default_trans(void)
}
spin_unlock(&v9fs_trans_lock);
+
+ for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++)
+ found = v9fs_get_trans_by_name(v9fs_default_transports[i]);
+
return found;
}
EXPORT_SYMBOL(v9fs_get_default_trans);
@@ -177,7 +186,6 @@ static int __init init_p9(void)
p9_error_init();
pr_info("Installing 9P2000 support\n");
- p9_trans_fd_init();
return ret;
}
@@ -191,7 +199,6 @@ static void __exit exit_p9(void)
{
pr_info("Unloading 9P2000 support\n");
- p9_trans_fd_exit();
p9_client_exit();
}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 827c47620fc0..8f8f95e39b03 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1090,6 +1090,7 @@ static struct p9_trans_module p9_tcp_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("tcp");
static struct p9_trans_module p9_unix_trans = {
.name = "unix",
@@ -1103,6 +1104,7 @@ static struct p9_trans_module p9_unix_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("unix");
static struct p9_trans_module p9_fd_trans = {
.name = "fd",
@@ -1116,6 +1118,7 @@ static struct p9_trans_module p9_fd_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("fd");
/**
* p9_poll_workfn - poll worker thread
@@ -1149,7 +1152,7 @@ static void p9_poll_workfn(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "finish\n");
}
-int p9_trans_fd_init(void)
+static int __init p9_trans_fd_init(void)
{
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
@@ -1158,10 +1161,17 @@ int p9_trans_fd_init(void)
return 0;
}
-void p9_trans_fd_exit(void)
+static void __exit p9_trans_fd_exit(void)
{
flush_work(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
}
+
+module_init(p9_trans_fd_init);
+module_exit(p9_trans_fd_exit);
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
+MODULE_LICENSE("GPL");
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bd5a89c4960d..b24a4fb0f0a2 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -648,7 +648,7 @@ fail:
* @args: args passed from sys_mount() for per-transport options (unused)
*
* This sets up a transport channel for 9p communication. Right now
- * we only match the first available channel, but eventually we couldlook up
+ * we only match the first available channel, but eventually we could look up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
@@ -721,7 +721,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_9p_lock);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 2418fa0b58f3..eb9fb55280ef 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -538,6 +538,7 @@ static void p9_trans_xen_exit(void)
}
module_exit(p9_trans_xen_exit);
+MODULE_ALIAS("xen:9pfs");
MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
MODULE_DESCRIPTION("Xen Transport for 9P");
MODULE_LICENSE("GPL");
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4369ffa3302a..9bf736290e48 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -108,7 +108,7 @@ out:
static inline void *vcc_walk(struct seq_file *seq, loff_t l)
{
struct vcc_state *state = seq->private;
- int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
+ int family = (uintptr_t)(pde_data(file_inode(seq->file)));
return __vcc_walk(&state->sk, family, &state->bucket, l) ?
state : NULL;
@@ -324,7 +324,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- dev = PDE_DATA(file_inode(file));
+ dev = pde_data(file_inode(file));
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1661979b6a6e..ee319779781e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -611,7 +611,7 @@ EXPORT_SYMBOL(bt_sock_wait_ready);
static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -619,7 +619,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
return seq_hlist_next(v, &l->head, pos);
}
@@ -627,14 +627,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_unlock(&l->lock);
}
static int bt_seq_show(struct seq_file *seq, void *v)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent");
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index c9add7753b9f..40baa6b7321a 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -535,7 +535,7 @@ static int bnep_session(void *arg)
up_write(&bnep_session_sem);
free_netdev(dev);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 83eb84e8e688..90d130588a3e 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -323,7 +323,7 @@ static int cmtp_session(void *arg)
up_write(&cmtp_session_sem);
kfree(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 80848dfc01db..5940744a8cd8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg)
l2cap_unregister_user(session->conn, &session->user);
hidp_session_put(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a52ad81596b7..55f47cadb114 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -724,10 +725,10 @@ err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
- dev_put(dev);
return err;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index bc88d901a1c0..95d209b52e6a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -193,7 +193,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
struct net *net = m->private;
- struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
+ struct sock *sk = (struct sock *)pde_data(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
diff --git a/net/can/proc.c b/net/can/proc.c
index b3099f0a3cb8..bbce97825f13 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -305,7 +305,7 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)PDE_DATA(m->file->f_inode);
+ int idx = (int)(long)pde_data(m->file->f_inode);
struct net_device *dev;
struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 5622763ad402..7e51f128045d 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -7,7 +7,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
+#include <linux/ceph/libceph.h> /* for kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (!b)
return NULL;
- b->vec.iov_base = ceph_kvmalloc(len, gfp);
+ b->vec.iov_base = kvmalloc(len, gfp);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 97d6ea763e32..ecc400a0b7bb 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,41 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
-/*
- * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
- * compatible with (a superset of) GFP_KERNEL. This is because while the
- * actual pages are allocated with the specified flags, the page table pages
- * are always allocated with GFP_KERNEL.
- *
- * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
- */
-void *ceph_kvmalloc(size_t size, gfp_t flags)
-{
- void *p;
-
- if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
- p = kvmalloc(size, flags);
- } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
- unsigned int nofs_flag = memalloc_nofs_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_nofs_restore(nofs_flag);
- } else {
- unsigned int noio_flag = memalloc_noio_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_noio_restore(noio_flag);
- }
-
- return p;
-}
-
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
- dout("parse_fsid '%s'\n", str);
+ dout("%s '%s'\n", __func__, str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
@@ -244,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+ dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err;
}
+EXPORT_SYMBOL(ceph_parse_fsid);
/*
* ceph options
@@ -422,14 +396,14 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l)
+ struct fc_log *l, char delim)
{
struct p_log log = {.prefix = "libceph", .log = l};
int ret;
- /* ip1[:port1][,ip2[:port2]...] */
+ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
- &opt->num_mon);
+ &opt->num_mon, delim);
if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
@@ -455,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip:
err = ceph_parse_ips(param->string,
param->string + param->size,
- &opt->my_addr,
- 1, NULL);
+ &opt->my_addr, 1, NULL, ',');
if (err) {
error_plog(&log, "Failed to parse ip: %d", err);
return err;
@@ -465,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_fsid:
- err = parse_fsid(param->string, &opt->fsid);
+ err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) {
error_plog(&log, "Failed to parse fsid: %d", err);
return err;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 92d89b331645..051d22c0e4ad 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
/*
- * Should be used for buffers allocated with ceph_kvmalloc().
+ * Should be used for buffers allocated with kvmalloc().
* Currently these are encrypt out-buffer (ceph_buffer) and decrypt
* in-buffer (msg front).
*
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57d043b382ed..45eba2dcb67a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/
int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count)
+ int max_count, int *count, char delim)
{
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
+ char cur_delim = delim;
const char *ipend;
int port;
- char delim = ',';
if (*p == '[') {
- delim = ']';
+ cur_delim = ']';
p++;
}
- ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+ &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
- if (delim == ']') {
+ if (cur_delim == ']') {
if (*p != ']') {
dout("missing matching ']'\n");
goto bad;
@@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0;
- dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+ dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end)
break;
- if (*p != ',')
+ if (*p != delim)
goto bad;
p++;
}
@@ -1920,7 +1921,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
/* front */
if (front_len) {
- m->front.iov_base = ceph_kvmalloc(front_len, flags);
+ m->front.iov_base = kvmalloc(front_len, flags);
if (m->front.iov_base == NULL) {
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index cc40ce4e02fb..c4099b641b38 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -308,7 +308,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len)
if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
return NULL;
- buf = ceph_kvmalloc(len, GFP_NOIO);
+ buf = kvmalloc(len, GFP_NOIO);
if (!buf)
return NULL;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 75b738083523..2823bb3cff55 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -980,7 +980,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c)
work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
dout("%s work_size %zu bytes\n", __func__, work_size);
- work = ceph_kvmalloc(work_size, GFP_NOIO);
+ work = kvmalloc(work_size, GFP_NOIO);
if (!work)
return NULL;
@@ -1190,9 +1190,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (max == map->max_osd)
return 0;
- state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
- weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
- addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+ state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+ weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+ addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
if (!state || !weight || !addr) {
kvfree(state);
kvfree(weight);
@@ -1222,7 +1222,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (map->osd_primary_affinity) {
u32 *affinity;
- affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+ affinity = kvmalloc(array_size(max, sizeof(*affinity)),
GFP_NOFS);
if (!affinity)
return -ENOMEM;
@@ -1503,7 +1503,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
if (!map->osd_primary_affinity) {
int i;
- map->osd_primary_affinity = ceph_kvmalloc(
+ map->osd_primary_affinity = kvmalloc(
array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
GFP_NOFS);
if (!map->osd_primary_affinity)
diff --git a/net/core/dev.c b/net/core/dev.c
index 84a0d9542fe9..1baab07820f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8981,6 +8981,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
goto out_unlock;
}
old_prog = link->prog;
+ if (old_prog->type != new_prog->type ||
+ old_prog->expected_attach_type != new_prog->expected_attach_type) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (old_prog == new_prog) {
/* no-op, don't disturb drivers */
bpf_prog_put(new_prog);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 213cb7b26b7a..6c2016f7f3d1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3364,7 +3364,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -3381,7 +3381,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -3401,7 +3401,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b7171c40434..a5b5bb99c644 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -164,8 +164,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
{
struct net *net;
if (ops->exit) {
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
+ cond_resched();
+ }
}
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 95a64c813ae5..f1a9bf7578e7 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
{
struct platform_device *pdev = of_find_device_by_node(np);
struct nvmem_cell *cell;
- const void *buf;
+ const void *mac;
size_t len;
int ret;
@@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
if (IS_ERR(cell))
return PTR_ERR(cell);
- buf = nvmem_cell_read(cell, &len);
+ mac = nvmem_cell_read(cell, &len);
nvmem_cell_put(cell);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- ret = 0;
- if (len == ETH_ALEN) {
- if (is_valid_ether_addr(buf))
- memcpy(addr, buf, ETH_ALEN);
- else
- ret = -EINVAL;
- } else if (len == 3 * ETH_ALEN - 1) {
- u8 mac[ETH_ALEN];
-
- if (mac_pton(buf, mac))
- memcpy(addr, mac, ETH_ALEN);
- else
- ret = -EINVAL;
- } else {
- ret = -EINVAL;
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
}
- kfree(buf);
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
- return ret;
+ return 0;
}
/**
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 560a5e712dc3..84b62cd7bc57 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -546,7 +546,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
static int pgctrl_open(struct inode *inode, struct file *file)
{
- return single_open(file, pgctrl_show, PDE_DATA(inode));
+ return single_open(file, pgctrl_show, pde_data(inode));
}
static const struct proc_ops pktgen_proc_ops = {
@@ -1811,7 +1811,7 @@ static ssize_t pktgen_if_write(struct file *file,
static int pktgen_if_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_if_show, PDE_DATA(inode));
+ return single_open(file, pktgen_if_show, pde_data(inode));
}
static const struct proc_ops pktgen_if_proc_ops = {
@@ -1948,7 +1948,7 @@ out:
static int pktgen_thread_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_thread_show, PDE_DATA(inode));
+ return single_open(file, pktgen_thread_show, pde_data(inode));
}
static const struct proc_ops pktgen_thread_proc_ops = {
diff --git a/net/core/sock.c b/net/core/sock.c
index e21485ab285d..4ff806d71921 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -844,6 +844,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
}
num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ dev_put(dev);
+
for (i = 0; i < num; i++) {
if (*(vclock_index + i) == phc_index) {
match = true;
@@ -2047,6 +2049,9 @@ void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
+ WARN_ON_ONCE(!llist_empty(&sk->defer_list));
+ sk_defer_free_flush(sk);
+
if (rcu_access_pointer(sk->sk_reuseport_cb)) {
reuseport_detach_sock(sk);
use_call_rcu = true;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 828de171708f..b4589861b84c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
+#include <linux/hash.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
#define DEVINDEX_HASHBITS 8
@@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi)
pr_warn("Freeing alive fib_info %p\n", fi);
return;
}
- fib_info_cnt--;
call_rcu(&fi->rcu, free_fib_info_rcu);
}
@@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi)
spin_lock_bh(&fib_info_lock);
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
+
+ /* Paired with READ_ONCE() in fib_create_info(). */
+ WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
if (fi->nh) {
@@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
- unsigned int mask = DEVINDEX_HASHSIZE - 1;
+ return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
- return (val ^
- (val >> DEVINDEX_HASHBITS) ^
- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+ return &fib_info_devhash[fib_devindex_hashfn(val)];
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -430,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- unsigned int hash;
spin_lock(&fib_info_lock);
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
+
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
@@ -1240,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static inline unsigned int fib_laddr_hashfn(__be32 val)
+static struct hlist_head *
+fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
- unsigned int mask = (fib_info_hash_size - 1);
+ u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+ fib_info_hash_bits);
- return ((__force u32)val ^
- ((__force u32)val >> 7) ^
- ((__force u32)val >> 14)) & mask;
+ return &fib_info_laddrhash[slot];
}
static struct hlist_head *fib_info_hash_alloc(int bytes)
@@ -1282,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
+ fib_info_hash_bits = ilog2(new_size);
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
@@ -1299,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
fib_info_hash = new_info_hash;
+ fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &fib_info_laddrhash[i];
+ struct hlist_head *lhead = &old_laddrhash[i];
struct hlist_node *n;
struct fib_info *fi;
hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
struct hlist_head *ldest;
- unsigned int new_hash;
- new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
- ldest = &new_laddrhash[new_hash];
+ ldest = fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, ldest);
}
}
- fib_info_laddrhash = new_laddrhash;
spin_unlock_bh(&fib_info_lock);
@@ -1430,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
#endif
err = -ENOBUFS;
- if (fib_info_cnt >= fib_info_hash_size) {
+
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
@@ -1462,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
return ERR_PTR(err);
}
- fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1591,12 +1600,13 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
+ fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
struct hlist_head *head;
- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+ head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, head);
}
if (fi->nh) {
@@ -1604,12 +1614,10 @@ link_it:
} else {
change_nexthops(fi) {
struct hlist_head *head;
- unsigned int hash;
if (!nexthop_nh->fib_nh_dev)
continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
@@ -1870,16 +1878,16 @@ nla_put_failure:
*/
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
- int ret = 0;
- unsigned int hash = fib_laddr_hashfn(local);
- struct hlist_head *head = &fib_info_laddrhash[hash];
int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
+ struct hlist_head *head;
struct fib_info *fi;
+ int ret = 0;
if (!fib_info_laddrhash || local == 0)
return 0;
+ head = fib_info_laddrhash_bucket(net, local);
hlist_for_each_entry(fi, head, fib_lhash) {
if (!net_eq(fi->fib_net, net) ||
fi->fib_tb_id != tb_id)
@@ -1961,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -1981,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
+ int ret = 0;
if (force)
scope = -1;
@@ -2131,7 +2137,6 @@ out:
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
- unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
@@ -2147,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 05cd198d7a6b..341096807100 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
/* The RCU read lock provides a memory barrier
* guaranteeing that if fqdir->dead is false then
* the hash table destruction will not start until
- * after we unlock. Paired with inet_frags_exit_net().
+ * after we unlock. Paired with fqdir_pre_exit().
*/
- if (!fqdir->dead) {
+ if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
refcount_dec(&fq->refcnt);
@@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
+ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+ long high_thresh = READ_ONCE(fqdir->high_thresh);
struct inet_frag_queue *fq = NULL, *prev;
- if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+ if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
return NULL;
rcu_read_lock();
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..fad803d2d711 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
rcu_read_lock();
- if (qp->q.fqdir->dead)
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&qp->q.lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ac2b95c5694..99db2e41ed10 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -604,8 +604,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), key->tos, 0,
- skb->mark, skb_get_hash(skb));
+ tunnel_id_to_key32(key->tun_id),
+ key->tos & ~INET_ECN_MASK, 0, skb->mark,
+ skb_get_hash(skb));
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index b518f20c9a24..f8e176c77d1c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -776,7 +776,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
sf->private = c;
@@ -788,7 +788,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
int ret;
ret = seq_release(inode, file);
@@ -802,7 +802,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
- struct clusterip_config *c = PDE_DATA(file_inode(file));
+ struct clusterip_config *c = pde_data(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a53f256bf9d3..9eb5fc247868 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -971,7 +971,7 @@ struct proto raw_prot = {
static struct sock *raw_get_first(struct seq_file *seq)
{
struct sock *sk;
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
@@ -987,7 +987,7 @@ found:
static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
do {
@@ -1016,7 +1016,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(&h->lock)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
read_lock(&h->lock);
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -1039,7 +1039,7 @@ EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
__releases(&h->lock)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
read_unlock(&h->lock);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f34e366b27..b53476e78c84 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3002,7 +3002,7 @@ static unsigned short seq_file_family(const struct seq_file *seq)
#endif
/* Iterated from proc fs */
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
return afinfo->family;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 464590ea922e..090360939401 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2960,7 +2960,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
@@ -2993,7 +2993,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
do {
sk = sk_next(sk);
@@ -3050,7 +3050,7 @@ void udp_seq_stop(struct seq_file *seq, void *v)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a618dce7e0bc..c0b138c20992 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
- if (rt->rt_type != RTN_UNICAST) {
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 86ad15abf897..750f9f9b4daf 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_route **rtp,
struct socket **sockp)
{
- struct sockaddr_mctp addr;
+ struct sockaddr_mctp addr = {0};
struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct socket *sock;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 7121ce2a47c0..78814417d753 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
bitmap = &ncf->bitmap;
spin_lock_irqsave(&nc->lock, flags);
- index = find_next_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_bit(bitmap, ncf->n_vids);
if (index >= ncf->n_vids) {
spin_unlock_irqrestore(&nc->lock, flags);
return -1;
@@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return -1;
}
- index = find_next_zero_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_zero_bit(bitmap, ncf->n_vids);
if (index < 0 || index >= ncf->n_vids) {
netdev_err(ndp->ndev.dev,
"Channel %u already has all VLAN filters set\n",
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 58dcafe8bf79..7d00a1452b1d 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -206,7 +206,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_src = nft_expr_priv(src);
priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
- if (priv_dst->list)
+ if (!priv_dst->list)
return -ENOMEM;
nf_conncount_list_init(priv_dst->list);
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 5ee33d0ccd4e..4f745a409d34 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -106,7 +106,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
- if (priv_dst->last)
+ if (!priv_dst->last)
return -ENOMEM;
return 0;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index f04be5be73a0..c4f308460dd1 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -145,7 +145,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->invert = priv_src->invert;
priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
- if (priv_dst->limit)
+ if (!priv_dst->limit)
return -ENOMEM;
spin_lock_init(&priv_dst->limit->lock);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0484aef74273..f394a0b562f6 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -237,7 +237,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_quota *priv_dst = nft_expr_priv(dst);
priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
- if (priv_dst->consumed)
+ if (!priv_dst->consumed)
return -ENOMEM;
atomic64_set(priv_dst->consumed, 0);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 25524e393349..54a489f16b17 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1517,7 +1517,7 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1529,7 +1529,7 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1540,7 +1540,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void xt_table_seq_stop(struct seq_file *seq, void *v)
{
- u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u_int8_t af = (unsigned long)pde_data(file_inode(seq->file));
mutex_unlock(&xt[af].mutex);
}
@@ -1584,7 +1584,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
[MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE,
};
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
if (ppos != NULL)
@@ -1633,7 +1633,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
{
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9c5cfd74a0ee..0859b8f76764 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1052,7 +1052,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -1069,7 +1069,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
*pos = ++(*bucket);
@@ -1083,7 +1083,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
if (!IS_ERR(bucket))
@@ -1125,7 +1125,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1140,7 +1140,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1155,7 +1155,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1169,7 +1169,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show_v2(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
@@ -1183,7 +1183,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
@@ -1197,7 +1197,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
static int dl_seq_show(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0446307516cd..7ddb9a78e3fc 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -551,7 +551,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
if (st == NULL)
return -ENOMEM;
- st->table = PDE_DATA(inode);
+ st->table = pde_data(inode);
return 0;
}
@@ -559,7 +559,7 @@ static ssize_t
recent_mt_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- struct recent_table *t = PDE_DATA(file_inode(file));
+ struct recent_table *t = pde_data(file_inode(file));
struct recent_entry *e;
char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
const char *c = buf;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 6cfd30fc0798..0b93a17b9f11 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index fa611678af05..18196e1c8c2f 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -79,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
int rc;
/* start channels */
- rc = mhi_prepare_for_transfer(mhi_dev);
+ rc = mhi_prepare_for_transfer_autoqueue(mhi_dev);
if (rc)
return rc;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c9c6f49f9c28..2cb496c84878 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_offload_graft_root(dev, new, old, extack);
- if (new && new->ops->attach)
+ if (new && new->ops->attach && !ingress)
goto skip;
for (i = 0; i < num_q; i++) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b07bd1c7330f..f893d9a81b01 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1529,6 +1529,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index aa3bcaaeabf7..961854e56736 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -634,9 +634,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first)
{
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
+ bool lgr_valid = false;
+
+ if (smc_conn_lgr_valid(conn))
+ lgr_valid = true;
smc_conn_free(conn);
- if (local_first)
+ if (local_first && lgr_valid)
smc_lgr_cleanup_early(lgr);
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1a4fc1c6c4ab..3d0b8e300deb 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -221,6 +221,7 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
+ u8 freed : 1; /* normal termiation */
u8 out_of_sync : 1; /* out of sync with peer */
};
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 84c8a4374fdd..9d5a97168969 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -197,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
- if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
+ if (!smc_conn_lgr_valid(conn) ||
+ (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
return -EPIPE;
if (conn->lgr->is_smcd) {
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 6be95a2a7b25..ce27399b38b1 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
SMC_FIRST_CONTACT_MASK : 0;
- if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
+ if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid));
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8935ef4811b0..29525d03b253 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!smc_conn_lgr_valid(conn))
return;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- conn->lgr = NULL;
}
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
@@ -749,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
+ refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
+ lnk->clearing = 0;
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
+ smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
lnk->link_idx = link_idx;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
@@ -806,6 +808,7 @@ out:
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold above */
return rc;
}
@@ -844,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->terminating = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
+ refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
@@ -996,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
+ /* link_hold in smc_conn_create() */
+ smcr_link_put(conn->lnk);
conn->lnk = to_lnk;
atomic_inc(&conn->lnk->conn_cnt);
+ /* link_put in smc_conn_free() */
+ smcr_link_hold(conn->lnk);
}
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
@@ -1130,8 +1138,19 @@ void smc_conn_free(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!lgr || conn->freed)
+ /* Connection has never been registered in a
+ * link group, or has already been freed.
+ */
return;
+
+ conn->freed = 1;
+ if (!smc_conn_lgr_valid(conn))
+ /* Connection has already unregistered from
+ * link group.
+ */
+ goto lgr_put;
+
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
@@ -1148,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
+lgr_put:
+ if (!lgr->is_smcd)
+ smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
+ smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
}
/* unregister a link from a buf_desc */
@@ -1203,13 +1226,29 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
}
}
-/* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk, bool log)
+static void __smcr_link_clear(struct smc_link *lnk)
{
+ struct smc_link_group *lgr = lnk->lgr;
struct smc_ib_device *smcibdev;
- if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ smc_wr_free_link_mem(lnk);
+ smc_ibdev_cnt_dec(lnk);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
+{
+ if (!lnk->lgr || lnk->clearing ||
+ lnk->state == SMC_LNK_UNUSED)
return;
+ lnk->clearing = 1;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk, log);
smcr_buf_unmap_lgr(lnk);
@@ -1218,14 +1257,18 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
- smc_wr_free_link_mem(lnk);
- smc_ibdev_cnt_dec(lnk);
- put_device(&lnk->smcibdev->ibdev->dev);
- smcibdev = lnk->smcibdev;
- memset(lnk, 0, sizeof(struct smc_link));
- lnk->state = SMC_LNK_UNUSED;
- if (!atomic_dec_return(&smcibdev->lnk_cnt))
- wake_up(&smcibdev->lnks_deleted);
+ smcr_link_put(lnk); /* theoretically last link_put */
+}
+
+void smcr_link_hold(struct smc_link *lnk)
+{
+ refcount_inc(&lnk->refcnt);
+}
+
+void smcr_link_put(struct smc_link *lnk)
+{
+ if (refcount_dec_and_test(&lnk->refcnt))
+ __smcr_link_clear(lnk);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1290,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
__smc_lgr_free_bufs(lgr, true);
}
+/* won't be freed until no one accesses to lgr anymore */
+static void __smc_lgr_free(struct smc_link_group *lgr)
+{
+ smc_lgr_free_bufs(lgr);
+ if (lgr->is_smcd) {
+ if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
+ wake_up(&lgr->smcd->lgrs_deleted);
+ } else {
+ smc_wr_free_lgr_mem(lgr);
+ if (!atomic_dec_return(&lgr_cnt))
+ wake_up(&lgrs_deleted);
+ }
+ kfree(lgr);
+}
+
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
@@ -1305,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
smc_llc_lgr_clear(lgr);
}
- smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
- if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
- wake_up(&lgr->smcd->lgrs_deleted);
- } else {
- smc_wr_free_lgr_mem(lgr);
- if (!atomic_dec_return(&lgr_cnt))
- wake_up(&lgrs_deleted);
}
- kfree(lgr);
+ smc_lgr_put(lgr); /* theoretically last lgr_put */
+}
+
+void smc_lgr_hold(struct smc_link_group *lgr)
+{
+ refcount_inc(&lgr->refcnt);
+}
+
+void smc_lgr_put(struct smc_link_group *lgr)
+{
+ if (refcount_dec_and_test(&lgr->refcnt))
+ __smc_lgr_free(lgr);
}
static void smc_sk_wake_ups(struct smc_sock *smc)
@@ -1469,16 +1531,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
- *
- * We must wait here for QPs been destroyed before we destroy the CQs,
- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
- * smc_sock cannot be released.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
- LIST_HEAD(lgr_linkdown_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
@@ -1490,7 +1547,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
- list_move_tail(&lgr->list, &lgr_linkdown_list);
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -1502,16 +1559,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
__smc_lgr_terminate(lgr, false);
}
- list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].smcibdev == smcibdev) {
- mutex_lock(&lgr->llc_conf_mutex);
- smcr_link_down_cond(&lgr->lnk[i]);
- mutex_unlock(&lgr->llc_conf_mutex);
- }
- }
- }
-
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
@@ -1856,6 +1903,10 @@ create:
goto out;
}
}
+ smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
+ if (!conn->lgr->is_smcd)
+ smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
+ conn->freed = 0;
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
@@ -2240,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+ !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+ !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
@@ -2256,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
int i;
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&conn->lgr->lnk[i]))
@@ -2270,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
int i;
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&conn->lgr->lnk[i]))
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 521c64a3d8d3..4cb03e942364 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -137,6 +137,8 @@ struct smc_link {
u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
u8 link_idx; /* index in lgr link array */
u8 link_is_asym; /* is link asymmetric? */
+ u8 clearing : 1; /* link is being cleared */
+ refcount_t refcnt; /* link reference count */
struct smc_link_group *lgr; /* parent link group */
struct work_struct link_down_wrk; /* wrk to bring link down */
char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */
@@ -249,6 +251,7 @@ struct smc_link_group {
u8 terminating : 1;/* lgr is terminating */
u8 freeing : 1; /* lgr is being freed */
+ refcount_t refcnt; /* lgr reference count */
bool is_smcd; /* SMC-R or SMC-D */
u8 smc_version;
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -409,6 +412,11 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline bool smc_conn_lgr_valid(struct smc_connection *conn)
+{
+ return conn->lgr && conn->alert_token_local;
+}
+
/*
* Returns true if the specified link is usable.
*
@@ -487,6 +495,8 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_cleanup_early(struct smc_link_group *lgr);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
+void smc_lgr_hold(struct smc_link_group *lgr);
+void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -518,6 +528,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smcr_link_hold(struct smc_link *lnk);
+void smcr_link_put(struct smc_link *lnk);
void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 7c8dad28c18d..b8898c787d23 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
r->diag_state = sk->sk_state;
if (smc->use_fallback)
r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
- else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+ else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd)
r->diag_mode = SMC_DIAG_MODE_SMCD;
else
r->diag_mode = SMC_DIAG_MODE_SMCR;
@@ -142,7 +142,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
}
- if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index db9825c01e0a..291f1484a1b7 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -369,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
new_pe->ndev = ndev;
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
+ if (ndev)
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
write_lock(&pnettable->lock);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 47512ccce5ef..a54e90a1110f 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -125,10 +125,6 @@ int smc_wr_tx_v2_send(struct smc_link *link,
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter,
- smc_wr_tx_dismisser dismisser,
- unsigned long data);
void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b87565b64928..c2ba9d4cd2c7 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net)
static ssize_t write_gssp(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
char tbuf[20];
unsigned long i;
int res;
@@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
static ssize_t read_gssp(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
unsigned long p = *ppos;
char tbuf[10];
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 59641803472c..bb1177395b99 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1536,7 +1536,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_read(filp, buf, count, ppos, cd);
}
@@ -1544,14 +1544,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_write(filp, buf, count, ppos, cd);
}
static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_poll(filp, wait, cd);
}
@@ -1560,21 +1560,21 @@ static long cache_ioctl_procfs(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_ioctl(inode, filp, cmd, arg, cd);
}
static int cache_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_open(inode, filp, cd);
}
static int cache_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_release(inode, filp, cd);
}
@@ -1591,14 +1591,14 @@ static const struct proc_ops cache_channel_proc_ops = {
static int content_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_open(inode, filp, cd);
}
static int content_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_release(inode, filp, cd);
}
@@ -1612,14 +1612,14 @@ static const struct proc_ops content_proc_ops = {
static int open_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return open_flush(inode, filp, cd);
}
static int release_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return release_flush(inode, filp, cd);
}
@@ -1627,7 +1627,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return read_flush(filp, buf, count, ppos, cd);
}
@@ -1636,7 +1636,7 @@ static ssize_t write_flush_procfs(struct file *filp,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return write_flush(filp, buf, count, ppos, cd);
}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index c964b48eaaba..52908f9e6eab 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
static int rpc_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rpc_proc_show, PDE_DATA(inode));
+ return single_open(file, rpc_proc_show, pde_data(inode));
}
static const struct proc_ops rpc_proc_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4292278a9552..2aabec2b4bec 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -37,18 +37,37 @@
static void svc_unregister(const struct svc_serv *serv, struct net *net);
-#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function)
-
#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+ SVC_POOL_AUTO = -1, /* choose one of the others */
+ SVC_POOL_GLOBAL, /* no mapping, just a single global pool
+ * (legacy & UP mode) */
+ SVC_POOL_PERCPU, /* one pool per cpu */
+ SVC_POOL_PERNODE /* one pool per numa node */
+};
+
+/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
-struct svc_pool_map svc_pool_map = {
+
+struct svc_pool_map {
+ int count; /* How many svc_servs use us */
+ int mode; /* Note: int not enum to avoid
+ * warnings about "enumeration value
+ * not handled in switch" */
+ unsigned int npools;
+ unsigned int *pool_to; /* maps pool id to cpu or node */
+ unsigned int *to_pool; /* maps cpu or node to pool id */
+};
+
+static struct svc_pool_map svc_pool_map = {
.mode = SVC_POOL_DEFAULT
};
-EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
@@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
/*
* Add a reference to the global map of cpus to pools (and
- * vice versa). Initialise the map if we're the first user.
- * Returns the number of pools.
+ * vice versa) if pools are in use.
+ * Initialise the map if we're the first user.
+ * Returns the number of pools. If this is '1', no reference
+ * was taken.
*/
-unsigned int
+static unsigned int
svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
@@ -232,6 +253,7 @@ svc_pool_map_get(void)
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
+ WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -247,30 +269,36 @@ svc_pool_map_get(void)
break;
}
- if (npools < 0) {
+ if (npools <= 0) {
/* default, or memory allocation failure */
npools = 1;
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
+ if (npools == 1)
+ /* service is unpooled, so doesn't hold a reference */
+ m->count--;
+
mutex_unlock(&svc_pool_map_mutex);
- return m->npools;
+ return npools;
}
-EXPORT_SYMBOL_GPL(svc_pool_map_get);
/*
- * Drop a reference to the global map of cpus to pools.
+ * Drop a reference to the global map of cpus to pools, if
+ * pools were in use, i.e. if npools > 1.
* When the last reference is dropped, the map data is
* freed; this allows the sysadmin to change the pool
* mode using the pool_mode module option without
* rebooting or re-loading sunrpc.ko.
*/
-void
-svc_pool_map_put(void)
+static void
+svc_pool_map_put(int npools)
{
struct svc_pool_map *m = &svc_pool_map;
+ if (npools <= 1)
+ return;
mutex_lock(&svc_pool_map_mutex);
if (!--m->count) {
@@ -283,7 +311,6 @@ svc_pool_map_put(void)
mutex_unlock(&svc_pool_map_mutex);
}
-EXPORT_SYMBOL_GPL(svc_pool_map_put);
static int svc_pool_map_get_node(unsigned int pidx)
{
@@ -340,21 +367,18 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
struct svc_pool_map *m = &svc_pool_map;
unsigned int pidx = 0;
- /*
- * An uninitialised map happens in a pure client when
- * lockd is brought up, so silently treat it the
- * same as SVC_POOL_GLOBAL.
- */
- if (svc_serv_is_pooled(serv)) {
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
- }
+ if (serv->sv_nrpools <= 1)
+ return serv->sv_pools;
+
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
}
+
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -435,7 +459,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_nrthreads = 1;
+ kref_init(&serv->sv_refcnt);
serv->sv_stats = prog->pg_stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
@@ -507,7 +531,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
goto out_err;
return serv;
out_err:
- svc_pool_map_put();
+ svc_pool_map_put(npools);
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
@@ -523,23 +547,14 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net);
/*
* Destroy an RPC service. Should be called with appropriate locking to
- * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
+ * protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct svc_serv *serv)
+svc_destroy(struct kref *ref)
{
- dprintk("svc: svc_destroy(%s, %d)\n",
- serv->sv_program->pg_name,
- serv->sv_nrthreads);
-
- if (serv->sv_nrthreads) {
- if (--(serv->sv_nrthreads) != 0) {
- svc_sock_update_bufs(serv);
- return;
- }
- } else
- printk("svc_destroy: no threads for serv=%p!\n", serv);
+ struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
del_timer_sync(&serv->sv_temptimer);
/*
@@ -551,8 +566,7 @@ svc_destroy(struct svc_serv *serv)
cache_clean_deferred(serv);
- if (svc_serv_is_pooled(serv))
- svc_pool_map_put();
+ svc_pool_map_put(serv->sv_nrpools);
kfree(serv->sv_pools);
kfree(serv);
@@ -638,7 +652,7 @@ out_enomem:
}
EXPORT_SYMBOL_GPL(svc_rqst_alloc);
-struct svc_rqst *
+static struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
struct svc_rqst *rqstp;
@@ -647,14 +661,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return ERR_PTR(-ENOMEM);
- serv->sv_nrthreads++;
+ svc_get(serv);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads += 1;
+ spin_unlock_bh(&serv->sv_lock);
+
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
return rqstp;
}
-EXPORT_SYMBOL_GPL(svc_prepare_thread);
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
@@ -748,59 +765,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-
-/* destroy old threads */
-static int
-svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- struct task_struct *task;
- unsigned int state = serv->sv_nrthreads-1;
-
- /* destroy old threads */
- do {
- task = choose_victim(serv, pool, &state);
- if (task == NULL)
- break;
- send_sig(SIGINT, task, 1);
- nrservs++;
- } while (nrservs < 0);
-
- return 0;
-}
-
/*
* Create or destroy enough new threads to make the number
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Caller must ensure that mutual exclusion between this and
* server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
*/
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
-
- if (nrservs > 0)
- return svc_start_kthreads(serv, pool, nrservs);
- if (nrservs < 0)
- return svc_signal_kthreads(serv, pool, nrservs);
- return 0;
-}
-EXPORT_SYMBOL_GPL(svc_set_num_threads);
/* destroy old threads */
static int
@@ -821,11 +792,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
}
int
-svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
+ nrservs -= serv->sv_nrthreads;
} else {
spin_lock_bh(&pool->sp_lock);
nrservs -= pool->sp_nrthreads;
@@ -838,7 +808,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
return svc_stop_kthreads(serv, pool, nrservs);
return 0;
}
-EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+EXPORT_SYMBOL_GPL(svc_set_num_threads);
/**
* svc_rqst_replace_page - Replace one page in rq_pages[]
@@ -890,11 +860,14 @@ svc_exit_thread(struct svc_rqst *rqstp)
list_del_rcu(&rqstp->rq_all);
spin_unlock_bh(&pool->sp_lock);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads -= 1;
+ spin_unlock_bh(&serv->sv_lock);
+ svc_sock_update_bufs(serv);
+
svc_rqst_free(rqstp);
- /* Release the server */
- if (serv)
- svc_destroy(serv);
+ svc_put(serv);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1e99ba1b9d72..b21ad7994147 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,6 +6,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
@@ -243,7 +244,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(xprt))
trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, xprt);
+ xcl->xcl_name, sap, len, xprt);
return xprt;
}
@@ -264,8 +265,6 @@ void svc_xprt_received(struct svc_xprt *xprt)
return;
}
- trace_svc_xprt_received(xprt);
-
/* As soon as we clear busy, the xprt could be closed and
* 'put', so we need a reference to call svc_enqueue_xprt with:
*/
@@ -466,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
out_unlock:
rcu_read_unlock();
put_cpu();
- trace_svc_xprt_do_enqueue(xprt, rqstp);
+ trace_svc_xprt_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
@@ -688,7 +687,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
return -EINTR;
}
trace_svc_alloc_arg_err(pages);
- schedule_timeout(msecs_to_jiffies(500));
+ memalloc_retry_wait(GFP_KERNEL);
}
rqstp->rq_page_end = &rqstp->rq_pages[pages];
rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
@@ -842,8 +841,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else
svc_xprt_received(xprt);
+
out:
- trace_svc_handle_xprt(xprt, len);
return len;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 95e774f1b91f..efc84845bb6b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2059,6 +2059,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
splice_read_end:
release_sock(sk);
+ sk_defer_free_flush(sk);
return copied ? : err;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 12e2ddaf887f..d45d5366115a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
{
/* If number of inflight sockets is insane,
* force a garbage collect right now.
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight() and gc_in_progress().
*/
- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
@@ -213,7 +216,9 @@ void unix_gc(void)
if (gc_in_progress)
goto out;
- gc_in_progress = true;
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, true);
+
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -299,7 +304,10 @@ void unix_gc(void)
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
- gc_in_progress = false;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, false);
+
wake_up(&unix_gc_wait);
out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 052ae709ce28..aa27a02478dc 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
} else {
BUG_ON(list_empty(&u->link));
}
- unix_tot_inflight++;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
user->unix_inflight++;
spin_unlock(&unix_gc_lock);
@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
- unix_tot_inflight--;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
user->unix_inflight--;
spin_unlock(&unix_gc_lock);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 4f7c99dfd16c..fb3302fff627 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -665,7 +665,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
vsock_for_each_connected_socket(virtio_vsock_reset_sock);
/* Stop all work handlers to make sure no one is accessing the device,
- * so we can safely call vdev->config->reset().
+ * so we can safely call virtio_reset_device().
*/
mutex_lock(&vsock->rx_lock);
vsock->rx_run = false;
@@ -682,7 +682,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
/* Flush all device writes and interrupts, device will not use any
* more buffers.
*/
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
mutex_lock(&vsock->rx_lock);
while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 756e7de7e33f..1e9be50469ce 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -33,8 +33,8 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
-$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
- $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \
+ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
@$(kecho) " GEN $@"
$(Q)(set -e; \
allf=""; \
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dccb8f3318ef..04d1ce9b510f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -31,6 +31,7 @@
#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/gre.h>
@@ -3295,7 +3296,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
+ fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
diff --git a/samples/Kconfig b/samples/Kconfig
index 43d2e9aa557f..22cc921ae291 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -241,6 +241,15 @@ config SAMPLE_WATCH_QUEUE
Build example userspace program to use the new mount_notify(),
sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function.
+config SAMPLE_CORESIGHT_SYSCFG
+ tristate "Build example loadable module for CoreSight config"
+ depends on CORESIGHT && m
+ help
+ Build an example loadable module that adds new CoreSight features
+ and configuration using the CoreSight system configuration API.
+ This demonstrates how a user may create their own CoreSight
+ configurations and easily load them into the system at runtime.
+
endif # SAMPLES
config HAVE_SAMPLE_FTRACE_DIRECT
diff --git a/samples/Makefile b/samples/Makefile
index 4bcd6b93bffa..1ae4de99c983 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/
subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog
subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/
+obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index 4866afd054da..eb4d94742e6b 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -113,11 +113,11 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index f6d593e47037..8fdd2c9c56b2 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -6,6 +6,7 @@
*/
#include <linux/version.h>
#include <linux/ptrace.h>
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -22,17 +23,17 @@ int prog(struct pt_regs *ctx)
{
struct signal_struct *signal;
struct task_struct *tsk;
- char oldcomm[16] = {};
- char newcomm[16] = {};
+ char oldcomm[TASK_COMM_LEN] = {};
+ char newcomm[TASK_COMM_LEN] = {};
u16 oom_score_adj;
u32 pid;
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read_kernel(newcomm, sizeof(newcomm),
- (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
index eaa32693f8fc..80edadacb692 100644
--- a/samples/bpf/test_overhead_tp_kern.c
+++ b/samples/bpf/test_overhead_tp_kern.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
@@ -11,8 +12,8 @@
struct task_rename {
__u64 pad;
__u32 pid;
- char oldcomm[16];
- char newcomm[16];
+ char oldcomm[TASK_COMM_LEN];
+ char newcomm[TASK_COMM_LEN];
__u16 oom_score_adj;
};
SEC("tracepoint/task/task_rename")
diff --git a/samples/coresight/Makefile b/samples/coresight/Makefile
new file mode 100644
index 000000000000..b3fce4af2347
--- /dev/null
+++ b/samples/coresight/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight-cfg-sample.o
+ccflags-y += -I$(srctree)/drivers/hwtracing/coresight
diff --git a/samples/coresight/coresight-cfg-sample.c b/samples/coresight/coresight-cfg-sample.c
new file mode 100644
index 000000000000..25485c80b5e3
--- /dev/null
+++ b/samples/coresight/coresight-cfg-sample.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-config.h"
+#include "coresight-syscfg.h"
+
+/* create an alternate autofdo configuration */
+
+/* we will provide 4 sets of preset parameter values */
+#define AFDO2_NR_PRESETS 4
+/* the total number of parameters in used features - strobing has 2 */
+#define AFDO2_NR_PARAM_SUM 2
+
+static const char *afdo2_ref_names[] = {
+ "strobing",
+};
+
+/*
+ * set of presets leaves strobing window constant while varying period to allow
+ * experimentation with mark / space ratios for various workloads
+ */
+static u64 afdo2_presets[AFDO2_NR_PRESETS][AFDO2_NR_PARAM_SUM] = {
+ { 1000, 100 },
+ { 1000, 1000 },
+ { 1000, 5000 },
+ { 1000, 10000 },
+};
+
+struct cscfg_config_desc afdo2 = {
+ .name = "autofdo2",
+ .description = "Setup ETMs with strobing for autofdo\n"
+ "Supplied presets allow experimentation with mark-space ratio for various loads\n",
+ .nr_feat_refs = ARRAY_SIZE(afdo2_ref_names),
+ .feat_ref_names = afdo2_ref_names,
+ .nr_presets = AFDO2_NR_PRESETS,
+ .nr_total_params = AFDO2_NR_PARAM_SUM,
+ .presets = &afdo2_presets[0][0],
+};
+
+static struct cscfg_feature_desc *sample_feats[] = {
+ NULL
+};
+
+static struct cscfg_config_desc *sample_cfgs[] = {
+ &afdo2,
+ NULL
+};
+
+static struct cscfg_load_owner_info mod_owner = {
+ .type = CSCFG_OWNER_MODULE,
+ .owner_handle = THIS_MODULE,
+};
+
+/* module init and exit - just load and unload configs */
+static int __init cscfg_sample_init(void)
+{
+ return cscfg_load_config_sets(sample_cfgs, sample_feats, &mod_owner);
+}
+
+static void __exit cscfg_sample_exit(void)
+{
+ cscfg_unload_config_sets(&mod_owner);
+}
+
+module_init(cscfg_sample_init);
+module_exit(cscfg_sample_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mike Leach <mike.leach@linaro.org>");
+MODULE_DESCRIPTION("CoreSight Syscfg Example");
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 1a72b7d95cdc..4d34dc0b0fee 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -21,6 +21,7 @@ static const char *random_strings[] = {
static void simple_thread_func(int cnt)
{
+ unsigned long bitmask[1] = {0xdeadbeefUL};
int array[6];
int len = cnt % 5;
int i;
@@ -43,6 +44,8 @@ static void simple_thread_func(int cnt)
trace_foo_with_template_cond("prints other times", cnt);
trace_foo_with_template_print("I have to be different", cnt);
+
+ trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask);
}
static int simple_thread(void *arg)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index e61471ab7d14..cbbbb83beced 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -155,7 +155,7 @@
*
* To assign this string, use the helper macro __assign_str_len().
*
- * __assign_str(foo, bar, len);
+ * __assign_str_len(foo, bar, len);
*
* Then len + 1 is allocated to the ring buffer, and a nul terminating
* byte is added. This is similar to:
@@ -506,6 +506,39 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print,
TP_ARGS(foo, bar),
TP_printk("bar %s %d", __get_str(foo), __entry->bar));
+/*
+ * There are yet another __rel_loc dynamic data attribute. If you
+ * use __rel_dynamic_array() and __rel_string() etc. macros, you
+ * can use this attribute. There is no difference from the viewpoint
+ * of functionality with/without 'rel' but the encoding is a bit
+ * different. This is expected to be used with user-space event,
+ * there is no reason that the kernel event use this, but only for
+ * testing.
+ */
+
+TRACE_EVENT(foo_rel_loc,
+
+ TP_PROTO(const char *foo, int bar, unsigned long *mask),
+
+ TP_ARGS(foo, bar, mask),
+
+ TP_STRUCT__entry(
+ __rel_string( foo, foo )
+ __field( int, bar )
+ __rel_bitmask( bitmask,
+ BITS_PER_BYTE * sizeof(unsigned long) )
+ ),
+
+ TP_fast_assign(
+ __assign_rel_str(foo, foo);
+ __entry->bar = bar;
+ __assign_rel_bitmask(bitmask, mask,
+ BITS_PER_BYTE * sizeof(unsigned long));
+ ),
+
+ TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar,
+ __get_rel_bitmask(bitmask))
+);
#endif
/***** NOTICE! The #if protection ends here. *****/
diff --git a/scripts/.gitignore b/scripts/.gitignore
index e83c620ef52c..eed308bef604 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
/asn1_compiler
/bin2c
-/extract-cert
/insert-sys-cert
/kallsyms
/module.lds
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index cdec22088423..3514c2149e9d 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -196,53 +196,6 @@ echo-why = $(call escsq, $(strip $(why)))
endif
###############################################################################
-#
-# When a Kconfig string contains a filename, it is suitable for
-# passing to shell commands. It is surrounded by double-quotes, and
-# any double-quotes or backslashes within it are escaped by
-# backslashes.
-#
-# This is no use for dependencies or $(wildcard). We need to strip the
-# surrounding quotes and the escaping from quotes and backslashes, and
-# we *do* need to escape any spaces in the string. So, for example:
-#
-# Usage: $(eval $(call config_filename,FOO))
-#
-# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
-# transformed as described above to be suitable for use within the
-# makefile.
-#
-# Also, if the filename is a relative filename and exists in the source
-# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
-# be prefixed to *both* command invocation and dependencies.
-#
-# Note: We also print the filenames in the quiet_cmd_foo text, and
-# perhaps ought to have a version specially escaped for that purpose.
-# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
-# enough. It'll strip the quotes in the common case where there's no
-# space and it's a simple filename, and it'll retain the quotes when
-# there's a space. There are some esoteric cases in which it'll print
-# the wrong thing, but we don't really care. The actual dependencies
-# and commands *do* get it right, with various combinations of single
-# and double quotes, backslashes and spaces in the filenames.
-#
-###############################################################################
-#
-define config_filename
-ifneq ($$(CONFIG_$(1)),"")
-$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
-ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
-else
-ifeq ($$(wildcard $$($(1)_FILENAME)),)
-ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
-$(1)_SRCPREFIX := $(srctree)/
-endif
-endif
-endif
-endif
-endef
-#
-###############################################################################
# delete partially updated (i.e. corrupted) files on error
.DELETE_ON_ERROR:
diff --git a/scripts/Makefile b/scripts/Makefile
index 9adb6d247818..ecd3acacd0ec 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -3,25 +3,19 @@
# scripts contains sources for various helper programs used throughout
# the kernel for the build process.
-CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto)
-CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null)
-
hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c
hostprogs-always-$(CONFIG_KALLSYMS) += kallsyms
hostprogs-always-$(BUILD_C_RECORDMCOUNT) += recordmcount
hostprogs-always-$(CONFIG_BUILDTIME_TABLE_SORT) += sorttable
hostprogs-always-$(CONFIG_ASN1) += asn1_compiler
hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
-hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
-hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert
HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include
+HOSTLDLIBS_sorttable = -lpthread
HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
-HOSTCFLAGS_sign-file.o = $(CRYPTO_CFLAGS)
-HOSTLDLIBS_sign-file = $(CRYPTO_LIBS)
-HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS)
-HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS)
+HOSTCFLAGS_sign-file.o = $(shell pkg-config --cflags libcrypto 2> /dev/null)
+HOSTLDLIBS_sign-file = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto)
ifdef CONFIG_UNWINDER_ORC
ifeq ($(ARCH),x86_64)
@@ -29,7 +23,10 @@ ARCH := x86
endif
HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include
HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED
-HOSTLDLIBS_sorttable = -lpthread
+endif
+
+ifdef CONFIG_DYNAMIC_FTRACE
+HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED
endif
# The following programs are only built on demand
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ab17f7b2e33c..79be57fdd32a 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -399,20 +399,35 @@ printf "%08x\n" $$dec_size | \
} \
)
+quiet_cmd_file_size = GEN $@
+ cmd_file_size = $(size_append) > $@
+
quiet_cmd_bzip2 = BZIP2 $@
- cmd_bzip2 = { cat $(real-prereqs) | $(KBZIP2) -9; $(size_append); } > $@
+ cmd_bzip2 = cat $(real-prereqs) | $(KBZIP2) -9 > $@
+
+quiet_cmd_bzip2_with_size = BZIP2 $@
+ cmd_bzip2_with_size = { cat $(real-prereqs) | $(KBZIP2) -9; $(size_append); } > $@
# Lzma
# ---------------------------------------------------------------------------
quiet_cmd_lzma = LZMA $@
- cmd_lzma = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@
+ cmd_lzma = cat $(real-prereqs) | $(LZMA) -9 > $@
+
+quiet_cmd_lzma_with_size = LZMA $@
+ cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@
quiet_cmd_lzo = LZO $@
- cmd_lzo = { cat $(real-prereqs) | $(KLZOP) -9; $(size_append); } > $@
+ cmd_lzo = cat $(real-prereqs) | $(KLZOP) -9 > $@
+
+quiet_cmd_lzo_with_size = LZO $@
+ cmd_lzo_with_size = { cat $(real-prereqs) | $(KLZOP) -9; $(size_append); } > $@
quiet_cmd_lz4 = LZ4 $@
- cmd_lz4 = { cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout; \
+ cmd_lz4 = cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout > $@
+
+quiet_cmd_lz4_with_size = LZ4 $@
+ cmd_lz4_with_size = { cat $(real-prereqs) | $(LZ4) -l -c1 stdin stdout; \
$(size_append); } > $@
# U-Boot mkimage
@@ -455,7 +470,10 @@ quiet_cmd_uimage = UIMAGE $@
# big dictionary would increase the memory usage too much in the multi-call
# decompression mode. A BCJ filter isn't used either.
quiet_cmd_xzkern = XZKERN $@
- cmd_xzkern = { cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh; \
+ cmd_xzkern = cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh > $@
+
+quiet_cmd_xzkern_with_size = XZKERN $@
+ cmd_xzkern_with_size = { cat $(real-prereqs) | sh $(srctree)/scripts/xz_wrap.sh; \
$(size_append); } > $@
quiet_cmd_xzmisc = XZMISC $@
@@ -478,10 +496,13 @@ quiet_cmd_xzmisc = XZMISC $@
# be used because it would require zstd to allocate a 128 MB buffer.
quiet_cmd_zstd = ZSTD $@
- cmd_zstd = { cat $(real-prereqs) | $(ZSTD) -19; $(size_append); } > $@
+ cmd_zstd = cat $(real-prereqs) | $(ZSTD) -19 > $@
quiet_cmd_zstd22 = ZSTD22 $@
- cmd_zstd22 = { cat $(real-prereqs) | $(ZSTD) -22 --ultra; $(size_append); } > $@
+ cmd_zstd22 = cat $(real-prereqs) | $(ZSTD) -22 --ultra > $@
+
+quiet_cmd_zstd22_with_size = ZSTD22 $@
+ cmd_zstd22_with_size = { cat $(real-prereqs) | $(ZSTD) -22 --ultra; $(size_append); } > $@
# ASM offsets
# ---------------------------------------------------------------------------
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index ff9b09e4cfca..c2c43a0ecfe0 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -66,9 +66,9 @@ endif
# Don't stop modules_install even if we can't sign external modules.
#
ifeq ($(CONFIG_MODULE_SIG_ALL),y)
+sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY)
quiet_cmd_sign = SIGN $@
-$(eval $(call config_filename,MODULE_SIG_KEY))
- cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY) certs/signing_key.x509 $@ \
+ cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(sig-key) certs/signing_key.x509 $@ \
$(if $(KBUILD_EXTMOD),|| true)
else
quiet_cmd_sign :=
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 9e2092fd5206..7099c603ff0a 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -8,7 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS) += -fsanitize=local-bounds
ubsan-cflags-$(CONFIG_UBSAN_SHIFT) += -fsanitize=shift
ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero
ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable
-ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE) += -fsanitize=object-size
ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool
ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum
ubsan-cflags-$(CONFIG_UBSAN_TRAP) += -fsanitize-undefined-trap-on-error
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 1784921c645d..b01c36a15d9d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3172,7 +3172,7 @@ sub process {
length($line) > 75 &&
!($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
# file delta changes
- $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
+ $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ ||
# filename then :
$line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i ||
# A Fixes: or Link: line or signature tag line
@@ -3479,47 +3479,47 @@ sub process {
# Kconfig supports named choices), so use a word boundary
# (\b) rather than a whitespace character (\s)
$line =~ /^\+\s*(?:config|menuconfig|choice)\b/) {
- my $length = 0;
- my $cnt = $realcnt;
- my $ln = $linenr + 1;
- my $f;
- my $is_start = 0;
- my $is_end = 0;
- for (; $cnt > 0 && defined $lines[$ln - 1]; $ln++) {
- $f = $lines[$ln - 1];
- $cnt-- if ($lines[$ln - 1] !~ /^-/);
- $is_end = $lines[$ln - 1] =~ /^\+/;
+ my $ln = $linenr;
+ my $needs_help = 0;
+ my $has_help = 0;
+ my $help_length = 0;
+ while (defined $lines[$ln]) {
+ my $f = $lines[$ln++];
next if ($f =~ /^-/);
- last if (!$file && $f =~ /^\@\@/);
+ last if ($f !~ /^[\+ ]/); # !patch context
- if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
- $is_start = 1;
- } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
- $length = -1;
+ if ($f =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
+ $needs_help = 1;
+ next;
+ }
+ if ($f =~ /^\+\s*help\s*$/) {
+ $has_help = 1;
+ next;
}
- $f =~ s/^.//;
- $f =~ s/#.*//;
- $f =~ s/^\s+//;
- next if ($f =~ /^$/);
+ $f =~ s/^.//; # strip patch context [+ ]
+ $f =~ s/#.*//; # strip # directives
+ $f =~ s/^\s+//; # strip leading blanks
+ next if ($f =~ /^$/); # skip blank lines
+ # At the end of this Kconfig block:
# This only checks context lines in the patch
# and so hopefully shouldn't trigger false
# positives, even though some of these are
# common words in help texts
- if ($f =~ /^\s*(?:config|menuconfig|choice|endchoice|
- if|endif|menu|endmenu|source)\b/x) {
- $is_end = 1;
+ if ($f =~ /^(?:config|menuconfig|choice|endchoice|
+ if|endif|menu|endmenu|source)\b/x) {
last;
}
- $length++;
+ $help_length++ if ($has_help);
}
- if ($is_start && $is_end && $length < $min_conf_desc_length) {
+ if ($needs_help &&
+ $help_length < $min_conf_desc_length) {
+ my $stat_real = get_stat_real($linenr, $ln - 1);
WARN("CONFIG_DESCRIPTION",
- "please write a paragraph that describes the config symbol fully\n" . $herecurr);
+ "please write a help paragraph that fully describes the config symbol\n" . "$here\n$stat_real\n");
}
- #print "is_start<$is_start> is_end<$is_end> length<$length>\n";
}
# check MAINTAINERS entries
diff --git a/scripts/coccinelle/iterators/fen.cocci b/scripts/coccinelle/iterators/fen.cocci
deleted file mode 100644
index b69f9665f4fb..000000000000
--- a/scripts/coccinelle/iterators/fen.cocci
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/// These iterators only exit normally when the loop cursor is NULL, so there
-/// is no point to call of_node_put on the final value.
-///
-// Confidence: High
-// Copyright: (C) 2010-2012 Nicolas Palix.
-// Copyright: (C) 2010-2012 Julia Lawall, INRIA/LIP6.
-// Copyright: (C) 2010-2012 Gilles Muller, INRIA/LiP6.
-// URL: http://coccinelle.lip6.fr/
-// Comments:
-// Options: --no-includes --include-headers
-
-virtual patch
-virtual context
-virtual org
-virtual report
-
-@depends on patch@
-iterator name for_each_node_by_name;
-expression np,E;
-identifier l;
-@@
-
-for_each_node_by_name(np,...) {
- ... when != break;
- when != goto l;
-}
-... when != np = E
-- of_node_put(np);
-
-@depends on patch@
-iterator name for_each_node_by_type;
-expression np,E;
-identifier l;
-@@
-
-for_each_node_by_type(np,...) {
- ... when != break;
- when != goto l;
-}
-... when != np = E
-- of_node_put(np);
-
-@depends on patch@
-iterator name for_each_compatible_node;
-expression np,E;
-identifier l;
-@@
-
-for_each_compatible_node(np,...) {
- ... when != break;
- when != goto l;
-}
-... when != np = E
-- of_node_put(np);
-
-@depends on patch@
-iterator name for_each_matching_node;
-expression np,E;
-identifier l;
-@@
-
-for_each_matching_node(np,...) {
- ... when != break;
- when != goto l;
-}
-... when != np = E
-- of_node_put(np);
-
-// ----------------------------------------------------------------------
-
-@r depends on !patch forall@
-//iterator name for_each_node_by_name;
-//iterator name for_each_node_by_type;
-//iterator name for_each_compatible_node;
-//iterator name for_each_matching_node;
-expression np,E;
-identifier l;
-position p1,p2;
-@@
-
-(
-*for_each_node_by_name@p1(np,...)
-{
- ... when != break;
- when != goto l;
-}
-|
-*for_each_node_by_type@p1(np,...)
-{
- ... when != break;
- when != goto l;
-}
-|
-*for_each_compatible_node@p1(np,...)
-{
- ... when != break;
- when != goto l;
-}
-|
-*for_each_matching_node@p1(np,...)
-{
- ... when != break;
- when != goto l;
-}
-)
-... when != np = E
-* of_node_put@p2(np);
-
-@script:python depends on org@
-p1 << r.p1;
-p2 << r.p2;
-@@
-
-cocci.print_main("unneeded of_node_put",p2)
-cocci.print_secs("iterator",p1)
-
-@script:python depends on report@
-p1 << r.p1;
-p2 << r.p2;
-@@
-
-msg = "ERROR: of_node_put not needed after iterator on line %s" % (p1[0].line)
-coccilib.report.print_report(p2[0], msg)
diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci
deleted file mode 100644
index 8d595c358408..000000000000
--- a/scripts/coccinelle/misc/bugon.cocci
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/// Use BUG_ON instead of a if condition followed by BUG.
-///
-//# This makes an effort to find cases where BUG() follows an if
-//# condition on an expression and replaces the if condition and BUG()
-//# with a BUG_ON having the conditional expression of the if statement
-//# as argument.
-//
-// Confidence: High
-// Copyright: (C) 2014 Himangi Saraogi.
-// Comments:
-// Options: --no-includes --include-headers
-
-virtual patch
-virtual context
-virtual org
-virtual report
-
-//----------------------------------------------------------
-// For context mode
-//----------------------------------------------------------
-
-@depends on context@
-expression e;
-@@
-
-*if (e) BUG();
-
-//----------------------------------------------------------
-// For patch mode
-//----------------------------------------------------------
-
-@depends on patch@
-expression e;
-@@
-
--if (e) BUG();
-+BUG_ON(e);
-
-//----------------------------------------------------------
-// For org and report mode
-//----------------------------------------------------------
-
-@r depends on (org || report)@
-expression e;
-position p;
-@@
-
- if (e) BUG@p ();
-
-@script:python depends on org@
-p << r.p;
-@@
-
-coccilib.org.print_todo(p[0], "WARNING use BUG_ON")
-
-@script:python depends on report@
-p << r.p;
-@@
-
-msg="WARNING: Use BUG_ON instead of if condition followed by BUG.\nPlease make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)"
-coccilib.report.print_report(p[0], msg)
-
diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch
index 3980985205a0..1eeb7b42c5b9 100644
--- a/scripts/const_structs.checkpatch
+++ b/scripts/const_structs.checkpatch
@@ -12,19 +12,27 @@ driver_info
drm_connector_funcs
drm_encoder_funcs
drm_encoder_helper_funcs
+dvb_frontend_ops
+dvb_tuner_ops
ethtool_ops
extent_io_ops
+fb_ops
file_lock_operations
file_operations
hv_ops
+hwmon_ops
+ib_device_ops
ide_dma_ops
ide_port_ops
+ieee80211_ops
+iio_buffer_setup_ops
inode_operations
intel_dvo_dev_ops
irq_domain_ops
item_operations
iwl_cfg
iwl_ops
+kernel_param_ops
kgdb_arch
kgdb_io
kset_uevent_ops
@@ -32,25 +40,33 @@ lock_manager_operations
machine_desc
microcode_ops
mlxsw_reg_info
+mtd_ooblayout_ops
mtrr_ops
+nand_controller_ops
neigh_ops
net_device_ops
+nft_expr_ops
nlmsvc_binding
nvkm_device_chip
of_device_id
pci_raw_ops
phy_ops
+pinconf_ops
pinctrl_ops
pinmux_ops
pipe_buf_operations
platform_hibernation_ops
platform_suspend_ops
+proc_ops
proto_ops
+pwm_ops
regmap_access_table
regulator_ops
+reset_control_ops
rpc_pipe_ops
rtc_class_ops
sd_desc
+sdhci_ops
seq_operations
sirfsoc_padmux
snd_ac97_build_ops
@@ -67,6 +83,13 @@ uart_ops
usb_mon_operations
v4l2_ctrl_ops
v4l2_ioctl_ops
+v4l2_subdev_core_ops
+v4l2_subdev_internal_ops
+v4l2_subdev_ops
+v4l2_subdev_pad_ops
+v4l2_subdev_video_ops
+vb2_ops
vm_operations_struct
wacom_features
+watchdog_ops
wd_ops
diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
index d3422ee15e30..f2bbde4bba86 100755
--- a/scripts/dtc/dtx_diff
+++ b/scripts/dtc/dtx_diff
@@ -59,12 +59,8 @@ Otherwise DTx is treated as a dts source file (aka .dts).
or '/include/' to be processed.
If DTx_1 and DTx_2 are in different architectures, then this script
- may not work since \${ARCH} is part of the include path. Two possible
- workarounds:
-
- `basename $0` \\
- <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\
- <(ARCH=arch_of_dtx_2 `basename $0` DTx_2)
+ may not work since \${ARCH} is part of the include path. The following
+ workaround can be used:
`basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts
`basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts
diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh
index 6ed0d225c8b1..120225c541c5 100755
--- a/scripts/gen_autoksyms.sh
+++ b/scripts/gen_autoksyms.sh
@@ -16,20 +16,15 @@ case "$KBUILD_VERBOSE" in
;;
esac
-# We need access to CONFIG_ symbols
-. include/config/auto.conf
-
needed_symbols=
# Special case for modversions (see modpost.c)
-if [ -n "$CONFIG_MODVERSIONS" ]; then
+if grep -q "^CONFIG_MODVERSIONS=y$" include/config/auto.conf; then
needed_symbols="$needed_symbols module_layout"
fi
-ksym_wl=
-if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then
- # Use 'eval' to expand the whitelist path and check if it is relative
- eval ksym_wl="$CONFIG_UNUSED_KSYMS_WHITELIST"
+ksym_wl=$(sed -n 's/^CONFIG_UNUSED_KSYMS_WHITELIST=\(.*\)$/\1/p' include/config/auto.conf)
+if [ -n "$ksym_wl" ]; then
[ "${ksym_wl}" != "${ksym_wl#/}" ] || ksym_wl="$abs_srctree/$ksym_wl"
if [ ! -f "$ksym_wl" ] || [ ! -r "$ksym_wl" ]; then
echo "ERROR: '$ksym_wl' whitelist file not found" >&2
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 2075db0c08b8..6bd5221d37b8 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1718,7 +1718,7 @@ sub vcs_exists {
%VCS_cmds = %VCS_cmds_hg;
return 2 if eval $VCS_cmds{"available"};
%VCS_cmds = ();
- if (!$printed_novcs) {
+ if (!$printed_novcs && $email_git) {
warn("$P: No supported VCS found. Add --nogit to options?\n");
warn("Using a git repository produces better results.\n");
warn("Try Linus Torvalds' latest git repository using:\n");
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 5a215880b268..b8ef0fb4bbef 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -69,7 +69,7 @@ localyesconfig localmodconfig: $(obj)/conf
# deprecated for external use
simple-targets := oldconfig allnoconfig allyesconfig allmodconfig \
alldefconfig randconfig listnewconfig olddefconfig syncconfig \
- helpnewconfig yes2modconfig mod2yesconfig
+ helpnewconfig yes2modconfig mod2yesconfig mod2noconfig
PHONY += $(simple-targets)
@@ -134,6 +134,7 @@ help:
@echo ' randconfig - New config with random answer to all options'
@echo ' yes2modconfig - Change answers from yes to mod if possible'
@echo ' mod2yesconfig - Change answers from mod to yes if possible'
+ @echo ' mod2noconfig - Change answers from mod to no if possible'
@echo ' listnewconfig - List new options'
@echo ' helpnewconfig - List new options and help text'
@echo ' olddefconfig - Same as oldconfig but sets new symbols to their'
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 971da3598fe4..4178065ca27f 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -35,6 +35,7 @@ enum input_mode {
olddefconfig,
yes2modconfig,
mod2yesconfig,
+ mod2noconfig,
};
static enum input_mode input_mode = oldaskconfig;
static int input_mode_opt;
@@ -163,8 +164,6 @@ enum conf_def_mode {
def_default,
def_yes,
def_mod,
- def_y2m,
- def_m2y,
def_no,
def_random
};
@@ -302,12 +301,10 @@ static bool conf_set_all_new_symbols(enum conf_def_mode mode)
return has_changed;
}
-static void conf_rewrite_mod_or_yes(enum conf_def_mode mode)
+static void conf_rewrite_tristates(tristate old_val, tristate new_val)
{
struct symbol *sym;
int i;
- tristate old_val = (mode == def_y2m) ? yes : mod;
- tristate new_val = (mode == def_y2m) ? mod : yes;
for_all_symbols(i, sym) {
if (sym_get_type(sym) == S_TRISTATE &&
@@ -685,6 +682,7 @@ static const struct option long_opts[] = {
{"olddefconfig", no_argument, &input_mode_opt, olddefconfig},
{"yes2modconfig", no_argument, &input_mode_opt, yes2modconfig},
{"mod2yesconfig", no_argument, &input_mode_opt, mod2yesconfig},
+ {"mod2noconfig", no_argument, &input_mode_opt, mod2noconfig},
{NULL, 0, NULL, 0}
};
@@ -713,6 +711,7 @@ static void conf_usage(const char *progname)
printf(" --randconfig New config with random answer to all options\n");
printf(" --yes2modconfig Change answers from yes to mod if possible\n");
printf(" --mod2yesconfig Change answers from mod to yes if possible\n");
+ printf(" --mod2noconfig Change answers from mod to no if possible\n");
printf(" (If none of the above is given, --oldaskconfig is the default)\n");
}
@@ -788,6 +787,7 @@ int main(int ac, char **av)
case olddefconfig:
case yes2modconfig:
case mod2yesconfig:
+ case mod2noconfig:
conf_read(NULL);
break;
case allnoconfig:
@@ -862,10 +862,13 @@ int main(int ac, char **av)
case savedefconfig:
break;
case yes2modconfig:
- conf_rewrite_mod_or_yes(def_y2m);
+ conf_rewrite_tristates(yes, mod);
break;
case mod2yesconfig:
- conf_rewrite_mod_or_yes(def_m2y);
+ conf_rewrite_tristates(mod, yes);
+ break;
+ case mod2noconfig:
+ conf_rewrite_tristates(mod, no);
break;
case oldaskconfig:
rootEntry = &rootmenu;
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 42bc56ee238c..59717be31210 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -244,19 +244,21 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p)
p, sym->name);
return 1;
case S_STRING:
- if (*p++ != '"')
- break;
- for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) {
- if (*p2 == '"') {
- *p2 = 0;
+ /* No escaping for S_DEF_AUTO (include/config/auto.conf) */
+ if (def != S_DEF_AUTO) {
+ if (*p++ != '"')
break;
+ for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) {
+ if (*p2 == '"') {
+ *p2 = 0;
+ break;
+ }
+ memmove(p2, p2 + 1, strlen(p2));
}
- memmove(p2, p2 + 1, strlen(p2));
- }
- if (!p2) {
- if (def != S_DEF_AUTO)
+ if (!p2) {
conf_warning("invalid string found");
- return 1;
+ return 1;
+ }
}
/* fall through */
case S_INT:
@@ -700,7 +702,7 @@ static void print_symbol_for_dotconfig(FILE *fp, struct symbol *sym)
static void print_symbol_for_autoconf(FILE *fp, struct symbol *sym)
{
- __print_symbol(fp, sym, OUTPUT_N_NONE, true);
+ __print_symbol(fp, sym, OUTPUT_N_NONE, false);
}
void print_symbol_for_listconfig(struct symbol *sym)
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index 1a5fea0519eb..3387ad7508f7 100755
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -170,7 +170,7 @@ sub read_kconfig {
$source =~ s/\$\($env\)/$ENV{$env}/;
}
- open(my $kinfile, '<', $source) || die "Can't open $kconfig";
+ open(my $kinfile, '<', $source) || die "Can't open $source";
while (<$kinfile>) {
chomp;
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 9716f285e404..666f7bbc13eb 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -34,6 +34,10 @@ LD="$1"
KBUILD_LDFLAGS="$2"
LDFLAGS_vmlinux="$3"
+is_enabled() {
+ grep -q "^$1=y" include/config/auto.conf
+}
+
# Nice output in kbuild format
# Will be supressed by "make -s"
info()
@@ -80,11 +84,11 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
- if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ if is_enabled CONFIG_LTO_CLANG; then
gen_initcalls
lds="-T .tmp_initcalls.lds"
- if [ -n "${CONFIG_MODVERSIONS}" ]; then
+ if is_enabled CONFIG_MODVERSIONS; then
gen_symversions
lds="${lds} -T .tmp_symversions.lds"
fi
@@ -104,21 +108,21 @@ objtool_link()
local objtoolcmd;
local objtoolopt;
- if [ "${CONFIG_LTO_CLANG} ${CONFIG_STACK_VALIDATION}" = "y y" ]; then
+ if is_enabled CONFIG_LTO_CLANG && is_enabled CONFIG_STACK_VALIDATION; then
# Don't perform vmlinux validation unless explicitly requested,
# but run objtool on vmlinux.o now that we have an object file.
- if [ -n "${CONFIG_UNWINDER_ORC}" ]; then
+ if is_enabled CONFIG_UNWINDER_ORC; then
objtoolcmd="orc generate"
fi
objtoolopt="${objtoolopt} --duplicate"
- if [ -n "${CONFIG_FTRACE_MCOUNT_USE_OBJTOOL}" ]; then
+ if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then
objtoolopt="${objtoolopt} --mcount"
fi
fi
- if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+ if is_enabled CONFIG_VMLINUX_VALIDATION; then
objtoolopt="${objtoolopt} --noinstr"
fi
@@ -127,19 +131,19 @@ objtool_link()
objtoolcmd="check"
fi
objtoolopt="${objtoolopt} --vmlinux"
- if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+ if ! is_enabled CONFIG_FRAME_POINTER; then
objtoolopt="${objtoolopt} --no-fp"
fi
- if [ -n "${CONFIG_GCOV_KERNEL}" ] || [ -n "${CONFIG_LTO_CLANG}" ]; then
+ if is_enabled CONFIG_GCOV_KERNEL || is_enabled CONFIG_LTO_CLANG; then
objtoolopt="${objtoolopt} --no-unreachable"
fi
- if [ -n "${CONFIG_RETPOLINE}" ]; then
+ if is_enabled CONFIG_RETPOLINE; then
objtoolopt="${objtoolopt} --retpoline"
fi
- if [ -n "${CONFIG_X86_SMAP}" ]; then
+ if is_enabled CONFIG_X86_SMAP; then
objtoolopt="${objtoolopt} --uaccess"
fi
- if [ -n "${CONFIG_SLS}" ]; then
+ if is_enabled CONFIG_SLS; then
objtoolopt="${objtoolopt} --sls"
fi
info OBJTOOL ${1}
@@ -164,7 +168,7 @@ vmlinux_link()
# skip output file argument
shift
- if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ if is_enabled CONFIG_LTO_CLANG; then
# Use vmlinux.o instead of performing the slow LTO link again.
objs=vmlinux.o
libs=
@@ -192,7 +196,7 @@ vmlinux_link()
ldflags="${ldflags} ${wl}--strip-debug"
fi
- if [ -n "${CONFIG_VMLINUX_MAP}" ]; then
+ if is_enabled CONFIG_VMLINUX_MAP; then
ldflags="${ldflags} ${wl}-Map=${output}.map"
fi
@@ -242,15 +246,15 @@ kallsyms()
{
local kallsymopt;
- if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then
+ if is_enabled CONFIG_KALLSYMS_ALL; then
kallsymopt="${kallsymopt} --all-symbols"
fi
- if [ -n "${CONFIG_KALLSYMS_ABSOLUTE_PERCPU}" ]; then
+ if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then
kallsymopt="${kallsymopt} --absolute-percpu"
fi
- if [ -n "${CONFIG_KALLSYMS_BASE_RELATIVE}" ]; then
+ if is_enabled CONFIG_KALLSYMS_BASE_RELATIVE; then
kallsymopt="${kallsymopt} --base-relative"
fi
@@ -315,9 +319,6 @@ if [ "$1" = "clean" ]; then
exit 0
fi
-# We need access to CONFIG_ symbols
-. include/config/auto.conf
-
# Update version
info GEN .version
if [ -r .version ]; then
@@ -346,7 +347,7 @@ tr '\0' '\n' < modules.builtin.modinfo | sed -n 's/^[[:alnum:]:_]*\.file=//p' |
tr ' ' '\n' | uniq | sed -e 's:^:kernel/:' -e 's/$/.ko/' > modules.builtin
btf_vmlinux_bin_o=""
-if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
+if is_enabled CONFIG_DEBUG_INFO_BTF; then
btf_vmlinux_bin_o=.btf.vmlinux.bin.o
if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then
echo >&2 "Failed to generate BTF for vmlinux"
@@ -358,7 +359,7 @@ fi
kallsymso=""
kallsymso_prev=""
kallsyms_vmlinux=""
-if [ -n "${CONFIG_KALLSYMS}" ]; then
+if is_enabled CONFIG_KALLSYMS; then
# kallsyms support
# Generate section listing all symbols and add it into vmlinux
@@ -398,12 +399,15 @@ fi
vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o}
# fill in BTF IDs
-if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then
+if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then
info BTFIDS vmlinux
${RESOLVE_BTFIDS} vmlinux
fi
-if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then
+info SYSMAP System.map
+mksysmap vmlinux System.map
+
+if is_enabled CONFIG_BUILDTIME_TABLE_SORT; then
info SORTTAB vmlinux
if ! sorttable vmlinux; then
echo >&2 Failed to sort kernel tables
@@ -411,11 +415,8 @@ if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then
fi
fi
-info SYSMAP System.map
-mksysmap vmlinux System.map
-
# step a (see comment above)
-if [ -n "${CONFIG_KALLSYMS}" ]; then
+if is_enabled CONFIG_KALLSYMS; then
mksysmap ${kallsyms_vmlinux} .tmp_System.map
if ! cmp -s System.map .tmp_System.map; then
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 4edc708baa63..7c20252a90c6 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -28,7 +28,7 @@ llvm)
if [ "$SRCARCH" = s390 ]; then
echo 13.0.0
else
- echo 10.0.1
+ echo 11.0.0
fi
;;
*)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index cb8ab7d91d30..6bfa33217914 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1830,6 +1830,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
return 0;
}
+#ifndef EM_RISCV
+#define EM_RISCV 243
+#endif
+
+#ifndef R_RISCV_SUB32
+#define R_RISCV_SUB32 39
+#endif
+
static void section_rela(const char *modname, struct elf_info *elf,
Elf_Shdr *sechdr)
{
@@ -1866,6 +1874,13 @@ static void section_rela(const char *modname, struct elf_info *elf,
r_sym = ELF_R_SYM(r.r_info);
#endif
r.r_addend = TO_NATIVE(rela->r_addend);
+ switch (elf->hdr->e_machine) {
+ case EM_RISCV:
+ if (!strcmp("__ex_table", fromsec) &&
+ ELF_R_TYPE(r.r_info) == R_RISCV_SUB32)
+ continue;
+ break;
+ }
sym = elf->symtab_start + r_sym;
/* Skip special sections */
if (is_shndx_special(sym->st_shndx))
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
index 80430b8fb617..7adab4618035 100755
--- a/scripts/remove-stale-files
+++ b/scripts/remove-stale-files
@@ -39,3 +39,5 @@ if [ -n "${building_out_of_srctree}" ]; then
rm -f arch/parisc/boot/compressed/${f}
done
fi
+
+rm -f scripts/extract-cert
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 6b54e46a0f12..af4754a35e66 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -111,9 +111,7 @@ if $scm_only; then
exit
fi
-if test -e include/config/auto.conf; then
- . include/config/auto.conf
-else
+if ! test -e include/config/auto.conf; then
echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2
exit 1
fi
@@ -125,10 +123,11 @@ if test ! "$srctree" -ef .; then
fi
# CONFIG_LOCALVERSION and LOCALVERSION (if set)
-res="${res}${CONFIG_LOCALVERSION}${LOCALVERSION}"
+config_localversion=$(sed -n 's/^CONFIG_LOCALVERSION=\(.*\)$/\1/p' include/config/auto.conf)
+res="${res}${config_localversion}${LOCALVERSION}"
# scm version string if not at a tagged commit
-if test "$CONFIG_LOCALVERSION_AUTO" = "y"; then
+if grep -q "^CONFIG_LOCALVERSION_AUTO=y$" include/config/auto.conf; then
# full scm version string
res="$res$(scm_version)"
elif [ "${LOCALVERSION+set}" != "set" ]; then
diff --git a/scripts/sorttable.c b/scripts/sorttable.c
index b7c2ad71f9cf..3a8ea5ed553d 100644
--- a/scripts/sorttable.c
+++ b/scripts/sorttable.c
@@ -30,6 +30,8 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
#include <tools/be_byteshift.h>
#include <tools/le_byteshift.h>
@@ -231,7 +233,7 @@ static void sort_relative_table(char *extab_image, int image_size)
}
}
-static void arm64_sort_relative_table(char *extab_image, int image_size)
+static void sort_relative_table_with_data(char *extab_image, int image_size)
{
int i = 0;
@@ -259,34 +261,6 @@ static void arm64_sort_relative_table(char *extab_image, int image_size)
}
}
-static void x86_sort_relative_table(char *extab_image, int image_size)
-{
- int i = 0;
-
- while (i < image_size) {
- uint32_t *loc = (uint32_t *)(extab_image + i);
-
- w(r(loc) + i, loc);
- w(r(loc + 1) + i + 4, loc + 1);
- /* Don't touch the fixup type */
-
- i += sizeof(uint32_t) * 3;
- }
-
- qsort(extab_image, image_size / 12, 12, compare_relative_table);
-
- i = 0;
- while (i < image_size) {
- uint32_t *loc = (uint32_t *)(extab_image + i);
-
- w(r(loc) - i, loc);
- w(r(loc + 1) - (i + 4), loc + 1);
- /* Don't touch the fixup type */
-
- i += sizeof(uint32_t) * 3;
- }
-}
-
static void s390_sort_relative_table(char *extab_image, int image_size)
{
int i;
@@ -364,15 +338,14 @@ static int do_file(char const *const fname, void *addr)
switch (r2(&ehdr->e_machine)) {
case EM_386:
+ case EM_AARCH64:
+ case EM_RISCV:
case EM_X86_64:
- custom_sort = x86_sort_relative_table;
+ custom_sort = sort_relative_table_with_data;
break;
case EM_S390:
custom_sort = s390_sort_relative_table;
break;
- case EM_AARCH64:
- custom_sort = arm64_sort_relative_table;
- break;
case EM_PARISC:
case EM_PPC:
case EM_PPC64:
@@ -383,7 +356,6 @@ static int do_file(char const *const fname, void *addr)
case EM_ARM:
case EM_MICROBLAZE:
case EM_MIPS:
- case EM_RISCV:
case EM_XTENSA:
break;
default:
diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..deb7c1d3e979 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -19,6 +19,9 @@
#undef extable_ent_size
#undef compare_extable
+#undef get_mcount_loc
+#undef sort_mcount_loc
+#undef elf_mcount_loc
#undef do_sort
#undef Elf_Addr
#undef Elf_Ehdr
@@ -41,6 +44,9 @@
#ifdef SORTTABLE_64
# define extable_ent_size 16
# define compare_extable compare_extable_64
+# define get_mcount_loc get_mcount_loc_64
+# define sort_mcount_loc sort_mcount_loc_64
+# define elf_mcount_loc elf_mcount_loc_64
# define do_sort do_sort_64
# define Elf_Addr Elf64_Addr
# define Elf_Ehdr Elf64_Ehdr
@@ -62,6 +68,9 @@
#else
# define extable_ent_size 8
# define compare_extable compare_extable_32
+# define get_mcount_loc get_mcount_loc_32
+# define sort_mcount_loc sort_mcount_loc_32
+# define elf_mcount_loc elf_mcount_loc_32
# define do_sort do_sort_32
# define Elf_Addr Elf32_Addr
# define Elf_Ehdr Elf32_Ehdr
@@ -84,8 +93,6 @@
#if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
/* ORC unwinder only support X86_64 */
-#include <errno.h>
-#include <pthread.h>
#include <asm/orc_types.h>
#define ERRSTR_MAXSZ 256
@@ -191,7 +198,66 @@ static int compare_extable(const void *a, const void *b)
return 1;
return 0;
}
+#ifdef MCOUNT_SORT_ENABLED
+pthread_t mcount_sort_thread;
+
+struct elf_mcount_loc {
+ Elf_Ehdr *ehdr;
+ Elf_Shdr *init_data_sec;
+ uint_t start_mcount_loc;
+ uint_t stop_mcount_loc;
+};
+
+/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */
+static void *sort_mcount_loc(void *arg)
+{
+ struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg;
+ uint_t offset = emloc->start_mcount_loc - _r(&(emloc->init_data_sec)->sh_addr)
+ + _r(&(emloc->init_data_sec)->sh_offset);
+ uint_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc;
+ unsigned char *start_loc = (void *)emloc->ehdr + offset;
+
+ qsort(start_loc, count/sizeof(uint_t), sizeof(uint_t), compare_extable);
+ return NULL;
+}
+
+/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */
+static void get_mcount_loc(uint_t *_start, uint_t *_stop)
+{
+ FILE *file_start, *file_stop;
+ char start_buff[20];
+ char stop_buff[20];
+ int len = 0;
+
+ file_start = popen(" grep start_mcount System.map | awk '{print $1}' ", "r");
+ if (!file_start) {
+ fprintf(stderr, "get start_mcount_loc error!");
+ return;
+ }
+
+ file_stop = popen(" grep stop_mcount System.map | awk '{print $1}' ", "r");
+ if (!file_stop) {
+ fprintf(stderr, "get stop_mcount_loc error!");
+ pclose(file_start);
+ return;
+ }
+
+ while (fgets(start_buff, sizeof(start_buff), file_start) != NULL) {
+ len = strlen(start_buff);
+ start_buff[len - 1] = '\0';
+ }
+ *_start = strtoul(start_buff, NULL, 16);
+
+ while (fgets(stop_buff, sizeof(stop_buff), file_stop) != NULL) {
+ len = strlen(stop_buff);
+ stop_buff[len - 1] = '\0';
+ }
+ *_stop = strtoul(stop_buff, NULL, 16);
+ pclose(file_start);
+ pclose(file_stop);
+}
+#endif
static int do_sort(Elf_Ehdr *ehdr,
char const *const fname,
table_sort_t custom_sort)
@@ -217,6 +283,11 @@ static int do_sort(Elf_Ehdr *ehdr,
int idx;
unsigned int shnum;
unsigned int shstrndx;
+#ifdef MCOUNT_SORT_ENABLED
+ struct elf_mcount_loc mstruct = {0};
+ uint_t _start_mcount_loc = 0;
+ uint_t _stop_mcount_loc = 0;
+#endif
#if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
unsigned int orc_ip_size = 0;
unsigned int orc_size = 0;
@@ -253,6 +324,17 @@ static int do_sort(Elf_Ehdr *ehdr,
symtab_shndx = (Elf32_Word *)((const char *)ehdr +
_r(&s->sh_offset));
+#ifdef MCOUNT_SORT_ENABLED
+ /* locate the .init.data section in vmlinux */
+ if (!strcmp(secstrings + idx, ".init.data")) {
+ get_mcount_loc(&_start_mcount_loc, &_stop_mcount_loc);
+ mstruct.ehdr = ehdr;
+ mstruct.init_data_sec = s;
+ mstruct.start_mcount_loc = _start_mcount_loc;
+ mstruct.stop_mcount_loc = _stop_mcount_loc;
+ }
+#endif
+
#if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
/* locate the ORC unwind tables */
if (!strcmp(secstrings + idx, ".orc_unwind_ip")) {
@@ -294,6 +376,23 @@ static int do_sort(Elf_Ehdr *ehdr,
goto out;
}
#endif
+
+#ifdef MCOUNT_SORT_ENABLED
+ if (!mstruct.init_data_sec || !_start_mcount_loc || !_stop_mcount_loc) {
+ fprintf(stderr,
+ "incomplete mcount's sort in file: %s\n",
+ fname);
+ goto out;
+ }
+
+ /* create thread to sort mcount_loc concurrently */
+ if (pthread_create(&mcount_sort_thread, NULL, &sort_mcount_loc, &mstruct)) {
+ fprintf(stderr,
+ "pthread_create mcount_sort_thread failed '%s': %s\n",
+ strerror(errno), fname);
+ goto out;
+ }
+#endif
if (!extab_sec) {
fprintf(stderr, "no __ex_table in file: %s\n", fname);
goto out;
@@ -364,11 +463,11 @@ out:
void *retval = NULL;
/* wait for ORC tables sort done */
rc = pthread_join(orc_sort_thread, &retval);
- if (rc)
+ if (rc) {
fprintf(stderr,
"pthread_join failed '%s': %s\n",
strerror(errno), fname);
- else if (retval) {
+ } else if (retval) {
rc = -1;
fprintf(stderr,
"failed to sort ORC tables '%s': %s\n",
@@ -376,5 +475,23 @@ out:
}
}
#endif
+
+#ifdef MCOUNT_SORT_ENABLED
+ if (mcount_sort_thread) {
+ void *retval = NULL;
+ /* wait for mcount sort done */
+ rc = pthread_join(mcount_sort_thread, &retval);
+ if (rc) {
+ fprintf(stderr,
+ "pthread_join failed '%s': %s\n",
+ strerror(errno), fname);
+ } else if (retval) {
+ rc = -1;
+ fprintf(stderr,
+ "failed to sort mcount '%s': %s\n",
+ (char *)retval, fname);
+ }
+ }
+#endif
return rc;
}
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index acf6ea711299..0c8b79cfb1bb 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -1046,6 +1046,7 @@ oustanding||outstanding
overaall||overall
overhread||overhead
overlaping||overlapping
+oveflow||overflow
overflw||overflow
overlfow||overflow
overide||override
diff --git a/scripts/tags.sh b/scripts/tags.sh
index b24bfaec6290..16d475b3e203 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -140,71 +140,71 @@ dogtags()
# - etags regular expressions have to match at the start of a line;
# a ^[^#] is prepended by setup_regex unless an anchor is already present
regex_asm=(
- '/^\(ENTRY\|_GLOBAL\)(\([[:alnum:]_\\]*\)).*/\2/'
+ '/^\(ENTRY\|_GLOBAL\)([[:space:]]*\([[:alnum:]_\\]*\)).*/\2/'
)
regex_c=(
- '/^SYSCALL_DEFINE[0-9](\([[:alnum:]_]*\).*/sys_\1/'
- '/^BPF_CALL_[0-9](\([[:alnum:]_]*\).*/\1/'
- '/^COMPAT_SYSCALL_DEFINE[0-9](\([[:alnum:]_]*\).*/compat_sys_\1/'
- '/^TRACE_EVENT(\([[:alnum:]_]*\).*/trace_\1/'
- '/^TRACE_EVENT(\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
- '/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1/'
- '/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
- '/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/get_\1_slot/'
- '/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/free_\1_slot/'
- '/^PAGEFLAG(\([[:alnum:]_]*\).*/Page\1/'
- '/^PAGEFLAG(\([[:alnum:]_]*\).*/SetPage\1/'
- '/^PAGEFLAG(\([[:alnum:]_]*\).*/ClearPage\1/'
- '/^TESTSETFLAG(\([[:alnum:]_]*\).*/TestSetPage\1/'
- '/^TESTPAGEFLAG(\([[:alnum:]_]*\).*/Page\1/'
- '/^SETPAGEFLAG(\([[:alnum:]_]*\).*/SetPage\1/'
- '/\<__SETPAGEFLAG(\([[:alnum:]_]*\).*/__SetPage\1/'
- '/\<TESTCLEARFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
- '/\<__TESTCLEARFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
- '/\<CLEARPAGEFLAG(\([[:alnum:]_]*\).*/ClearPage\1/'
- '/\<__CLEARPAGEFLAG(\([[:alnum:]_]*\).*/__ClearPage\1/'
- '/^__PAGEFLAG(\([[:alnum:]_]*\).*/__SetPage\1/'
- '/^__PAGEFLAG(\([[:alnum:]_]*\).*/__ClearPage\1/'
- '/^PAGEFLAG_FALSE(\([[:alnum:]_]*\).*/Page\1/'
- '/\<TESTSCFLAG(\([[:alnum:]_]*\).*/TestSetPage\1/'
- '/\<TESTSCFLAG(\([[:alnum:]_]*\).*/TestClearPage\1/'
- '/\<SETPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/SetPage\1/'
- '/\<CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/ClearPage\1/'
- '/\<__CLEARPAGEFLAG_NOOP(\([[:alnum:]_]*\).*/__ClearPage\1/'
- '/\<TESTCLEARFLAG_FALSE(\([[:alnum:]_]*\).*/TestClearPage\1/'
- '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/Page\1/'
- '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__SetPage\1/'
- '/^PAGE_TYPE_OPS(\([[:alnum:]_]*\).*/__ClearPage\1/'
- '/^TASK_PFA_TEST([^,]*, *\([[:alnum:]_]*\))/task_\1/'
- '/^TASK_PFA_SET([^,]*, *\([[:alnum:]_]*\))/task_set_\1/'
- '/^TASK_PFA_CLEAR([^,]*, *\([[:alnum:]_]*\))/task_clear_\1/'
- '/^DEF_MMIO_\(IN\|OUT\)_[XD](\([[:alnum:]_]*\),[^)]*)/\2/'
- '/^DEBUGGER_BOILERPLATE(\([[:alnum:]_]*\))/\1/'
- '/^DEF_PCI_AC_\(\|NO\)RET(\([[:alnum:]_]*\).*/\2/'
- '/^PCI_OP_READ(\(\w*\).*[1-4])/pci_bus_read_config_\1/'
- '/^PCI_OP_WRITE(\(\w*\).*[1-4])/pci_bus_write_config_\1/'
- '/\<DEFINE_\(RT_MUTEX\|MUTEX\|SEMAPHORE\|SPINLOCK\)(\([[:alnum:]_]*\)/\2/v/'
- '/\<DEFINE_\(RAW_SPINLOCK\|RWLOCK\|SEQLOCK\)(\([[:alnum:]_]*\)/\2/v/'
- '/\<DECLARE_\(RWSEM\|COMPLETION\)(\([[:alnum:]_]\+\)/\2/v/'
- '/\<DECLARE_BITMAP(\([[:alnum:]_]*\)/\1/v/'
- '/\(^\|\s\)\(\|L\|H\)LIST_HEAD(\([[:alnum:]_]*\)/\3/v/'
- '/\(^\|\s\)RADIX_TREE(\([[:alnum:]_]*\)/\2/v/'
- '/\<DEFINE_PER_CPU([^,]*, *\([[:alnum:]_]*\)/\1/v/'
- '/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*, *\([[:alnum:]_]*\)/\1/v/'
- '/\<DECLARE_WAIT_QUEUE_HEAD(\([[:alnum:]_]*\)/\1/v/'
- '/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)(\([[:alnum:]_]*\)/\2/v/'
- '/\(^\s\)OFFSET(\([[:alnum:]_]*\)/\2/v/'
- '/\(^\s\)DEFINE(\([[:alnum:]_]*\)/\2/v/'
- '/\<\(DEFINE\|DECLARE\)_HASHTABLE(\([[:alnum:]_]*\)/\2/v/'
- '/\<DEFINE_ID\(R\|A\)(\([[:alnum:]_]\+\)/\2/'
- '/\<DEFINE_WD_CLASS(\([[:alnum:]_]\+\)/\1/'
- '/\<ATOMIC_NOTIFIER_HEAD(\([[:alnum:]_]\+\)/\1/'
- '/\<RAW_NOTIFIER_HEAD(\([[:alnum:]_]\+\)/\1/'
- '/\<DECLARE_FAULT_ATTR(\([[:alnum:]_]\+\)/\1/'
- '/\<BLOCKING_NOTIFIER_HEAD(\([[:alnum:]_]\+\)/\1/'
- '/\<DEVICE_ATTR_\(RW\|RO\|WO\)(\([[:alnum:]_]\+\)/dev_attr_\2/'
- '/\<DRIVER_ATTR_\(RW\|RO\|WO\)(\([[:alnum:]_]\+\)/driver_attr_\2/'
- '/\<\(DEFINE\|DECLARE\)_STATIC_KEY_\(TRUE\|FALSE\)\(\|_RO\)(\([[:alnum:]_]\+\)/\4/'
+ '/^SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/sys_\1/'
+ '/^BPF_CALL_[0-9]([[:space:]]*\([[:alnum:]_]*\).*/\1/'
+ '/^COMPAT_SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/compat_sys_\1/'
+ '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1/'
+ '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
+ '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1/'
+ '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
+ '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/get_\1_slot/'
+ '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/free_\1_slot/'
+ '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/Page\1/'
+ '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/SetPage\1/'
+ '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/^TESTSETFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestSetPage\1/'
+ '/^TESTPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/Page\1/'
+ '/^SETPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/SetPage\1/'
+ '/\<__SETPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/\<TESTCLEARFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<__TESTCLEARFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<CLEARPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/\<__CLEARPAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^__PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/^__PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^PAGEFLAG_FALSE([[:space:]]*\([[:alnum:]_]*\).*/Page\1/'
+ '/\<TESTSCFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestSetPage\1/'
+ '/\<TESTSCFLAG([[:space:]]*\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/\<SETPAGEFLAG_NOOP([[:space:]]*\([[:alnum:]_]*\).*/SetPage\1/'
+ '/\<CLEARPAGEFLAG_NOOP([[:space:]]*\([[:alnum:]_]*\).*/ClearPage\1/'
+ '/\<__CLEARPAGEFLAG_NOOP([[:space:]]*\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/\<TESTCLEARFLAG_FALSE([[:space:]]*\([[:alnum:]_]*\).*/TestClearPage\1/'
+ '/^PAGE_TYPE_OPS([[:space:]]*\([[:alnum:]_]*\).*/Page\1/'
+ '/^PAGE_TYPE_OPS([[:space:]]*\([[:alnum:]_]*\).*/__SetPage\1/'
+ '/^PAGE_TYPE_OPS([[:space:]]*\([[:alnum:]_]*\).*/__ClearPage\1/'
+ '/^TASK_PFA_TEST([^,]*,[[:space:]]*\([[:alnum:]_]*\))/task_\1/'
+ '/^TASK_PFA_SET([^,]*,[[:space:]]*\([[:alnum:]_]*\))/task_set_\1/'
+ '/^TASK_PFA_CLEAR([^,]*,[[:space:]]*\([[:alnum:]_]*\))/task_clear_\1/'
+ '/^DEF_MMIO_\(IN\|OUT\)_[XD]([[:space:]]*\([[:alnum:]_]*\),[^)]*)/\2/'
+ '/^DEBUGGER_BOILERPLATE([[:space:]]*\([[:alnum:]_]*\))/\1/'
+ '/^DEF_PCI_AC_\(\|NO\)RET([[:space:]]*\([[:alnum:]_]*\).*/\2/'
+ '/^PCI_OP_READ([[:space:]]*\(\w*\).*[1-4])/pci_bus_read_config_\1/'
+ '/^PCI_OP_WRITE([[:space:]]*\(\w*\).*[1-4])/pci_bus_write_config_\1/'
+ '/\<DEFINE_\(RT_MUTEX\|MUTEX\|SEMAPHORE\|SPINLOCK\)([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_\(RAW_SPINLOCK\|RWLOCK\|SEQLOCK\)([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\<DECLARE_\(RWSEM\|COMPLETION\)([[:space:]]*\([[:alnum:]_]\+\)/\2/v/'
+ '/\<DECLARE_BITMAP([[:space:]]*\([[:alnum:]_]*\)/\1/v/'
+ '/\(^\|\s\)\(\|L\|H\)LIST_HEAD([[:space:]]*\([[:alnum:]_]*\)/\3/v/'
+ '/\(^\|\s\)RADIX_TREE([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_PER_CPU([^,]*,[[:space:]]*\([[:alnum:]_]*\)/\1/v/'
+ '/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*,[[:space:]]*\([[:alnum:]_]*\)/\1/v/'
+ '/\<DECLARE_WAIT_QUEUE_HEAD([[:space:]]*\([[:alnum:]_]*\)/\1/v/'
+ '/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\(^\s\)OFFSET([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\(^\s\)DEFINE([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\<\(DEFINE\|DECLARE\)_HASHTABLE([[:space:]]*\([[:alnum:]_]*\)/\2/v/'
+ '/\<DEFINE_ID\(R\|A\)([[:space:]]*\([[:alnum:]_]\+\)/\2/'
+ '/\<DEFINE_WD_CLASS([[:space:]]*\([[:alnum:]_]\+\)/\1/'
+ '/\<ATOMIC_NOTIFIER_HEAD([[:space:]]*\([[:alnum:]_]\+\)/\1/'
+ '/\<RAW_NOTIFIER_HEAD([[:space:]]*\([[:alnum:]_]\+\)/\1/'
+ '/\<DECLARE_FAULT_ATTR([[:space:]]*\([[:alnum:]_]\+\)/\1/'
+ '/\<BLOCKING_NOTIFIER_HEAD([[:space:]]*\([[:alnum:]_]\+\)/\1/'
+ '/\<DEVICE_ATTR_\(RW\|RO\|WO\)([[:space:]]*\([[:alnum:]_]\+\)/dev_attr_\2/'
+ '/\<DRIVER_ATTR_\(RW\|RO\|WO\)([[:space:]]*\([[:alnum:]_]\+\)/driver_attr_\2/'
+ '/\<\(DEFINE\|DECLARE\)_STATIC_KEY_\(TRUE\|FALSE\)\(\|_RO\)([[:space:]]*\([[:alnum:]_]\+\)/\4/'
'/^SEQCOUNT_LOCKTYPE(\([^,]*\),[[:space:]]*\([^,]*\),[^)]*)/seqcount_\2_t/'
'/^SEQCOUNT_LOCKTYPE(\([^,]*\),[[:space:]]*\([^,]*\),[^)]*)/seqcount_\2_init/'
)
diff --git a/sound/core/info.c b/sound/core/info.c
index a451b24199c3..782fba87cc04 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -234,7 +234,7 @@ static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma)
static int snd_info_entry_open(struct inode *inode, struct file *file)
{
- struct snd_info_entry *entry = PDE_DATA(inode);
+ struct snd_info_entry *entry = pde_data(inode);
struct snd_info_private_data *data;
int mode, err;
@@ -365,7 +365,7 @@ static int snd_info_seq_show(struct seq_file *seq, void *p)
static int snd_info_text_entry_open(struct inode *inode, struct file *file)
{
- struct snd_info_entry *entry = PDE_DATA(inode);
+ struct snd_info_entry *entry = pde_data(inode);
struct snd_info_private_data *data;
int err;
diff --git a/sound/core/init.c b/sound/core/init.c
index ac335f5906c6..31ba7024e3ad 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -1111,29 +1111,14 @@ EXPORT_SYMBOL(snd_card_file_remove);
*/
int snd_power_ref_and_wait(struct snd_card *card)
{
- wait_queue_entry_t wait;
- int result = 0;
-
snd_power_ref(card);
- /* fastpath */
if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
return 0;
- init_waitqueue_entry(&wait, current);
- add_wait_queue(&card->power_sleep, &wait);
- while (1) {
- if (card->shutdown) {
- result = -ENODEV;
- break;
- }
- if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
- break;
- snd_power_unref(card);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(30 * HZ);
- snd_power_ref(card);
- }
- remove_wait_queue(&card->power_sleep, &wait);
- return result;
+ wait_event_cmd(card->power_sleep,
+ card->shutdown ||
+ snd_power_get_state(card) == SNDRV_CTL_POWER_D0,
+ snd_power_unref(card), snd_power_ref(card));
+ return card->shutdown ? -ENODEV : 0;
}
EXPORT_SYMBOL_GPL(snd_power_ref_and_wait);
diff --git a/sound/core/misc.c b/sound/core/misc.c
index 3579dd7a161f..50e4aaa6270d 100644
--- a/sound/core/misc.c
+++ b/sound/core/misc.c
@@ -112,7 +112,7 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device,
{
const struct snd_pci_quirk *q;
- for (q = list; q->subvendor; q++) {
+ for (q = list; q->subvendor || q->subdevice; q++) {
if (q->subvendor != vendor)
continue;
if (!q->subdevice ||
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index 30b40d865863..718595380868 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
//
-// cs35l41.c -- CS35l41 ALSA HDA audio driver
+// CS35l41 ALSA HDA audio driver
//
// Copyright 2021 Cirrus Logic, Inc.
//
@@ -17,19 +17,19 @@
#include "cs35l41_hda.h"
static const struct reg_sequence cs35l41_hda_config[] = {
- { CS35L41_PLL_CLK_CTRL, 0x00000430 }, //3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
- { CS35L41_GLOBAL_CLK_CTRL, 0x00000003 }, //GLOBAL_FS = 48 kHz
- { CS35L41_SP_ENABLES, 0x00010000 }, //ASP_RX1_EN = 1
- { CS35L41_SP_RATE_CTRL, 0x00000021 }, //ASP_BCLK_FREQ = 3.072 MHz
- { CS35L41_SP_FORMAT, 0x20200200 }, //24 bits, I2S, BCLK Slave, FSYNC Slave
- { CS35L41_DAC_PCM1_SRC, 0x00000008 }, //DACPCM1_SRC = ASPRX1
- { CS35L41_AMP_DIG_VOL_CTRL, 0x00000000 }, //AMP_VOL_PCM 0.0 dB
- { CS35L41_AMP_GAIN_CTRL, 0x00000084 }, //AMP_GAIN_PCM 4.5 dB
- { CS35L41_PWR_CTRL2, 0x00000001 }, //AMP_EN = 1
+ { CS35L41_PLL_CLK_CTRL, 0x00000430 }, // 3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
+ { CS35L41_GLOBAL_CLK_CTRL, 0x00000003 }, // GLOBAL_FS = 48 kHz
+ { CS35L41_SP_ENABLES, 0x00010000 }, // ASP_RX1_EN = 1
+ { CS35L41_SP_RATE_CTRL, 0x00000021 }, // ASP_BCLK_FREQ = 3.072 MHz
+ { CS35L41_SP_FORMAT, 0x20200200 }, // 24 bits, I2S, BCLK Slave, FSYNC Slave
+ { CS35L41_DAC_PCM1_SRC, 0x00000008 }, // DACPCM1_SRC = ASPRX1
+ { CS35L41_AMP_DIG_VOL_CTRL, 0x00000000 }, // AMP_VOL_PCM 0.0 dB
+ { CS35L41_AMP_GAIN_CTRL, 0x00000084 }, // AMP_GAIN_PCM 4.5 dB
+ { CS35L41_PWR_CTRL2, 0x00000001 }, // AMP_EN = 1
};
static const struct reg_sequence cs35l41_hda_start_bst[] = {
- { CS35L41_PWR_CTRL2, 0x00000021 }, //BST_EN = 10, AMP_EN = 1
+ { CS35L41_PWR_CTRL2, 0x00000021 }, // BST_EN = 10, AMP_EN = 1
{ CS35L41_PWR_CTRL1, 0x00000001, 3000}, // set GLOBAL_EN = 1
};
@@ -60,7 +60,7 @@ static const struct reg_sequence cs35l41_stop_ext_vspk[] = {
{ 0x00000040, 0x00000055 },
{ 0x00000040, 0x000000AA },
{ 0x00007438, 0x00585941 },
- { 0x00002014, 0x00000000, 3000}, //set GLOBAL_EN = 0
+ { 0x00002014, 0x00000000, 3000}, // set GLOBAL_EN = 0
{ 0x0000742C, 0x00000009 },
{ 0x00007438, 0x00580941 },
{ 0x00011008, 0x00000001 },
@@ -78,7 +78,7 @@ static const struct reg_sequence cs35l41_safe_to_active[] = {
{ 0x0000742C, 0x0000000F },
{ 0x0000742C, 0x00000079 },
{ 0x00007438, 0x00585941 },
- { CS35L41_PWR_CTRL1, 0x00000001, 2000 }, //GLOBAL_EN = 1
+ { CS35L41_PWR_CTRL1, 0x00000001, 2000 }, // GLOBAL_EN = 1
{ 0x0000742C, 0x000000F9 },
{ 0x00007438, 0x00580941 },
{ 0x00000040, 0x000000CC },
@@ -89,8 +89,8 @@ static const struct reg_sequence cs35l41_active_to_safe[] = {
{ 0x00000040, 0x00000055 },
{ 0x00000040, 0x000000AA },
{ 0x00007438, 0x00585941 },
- { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, //AMP_VOL_PCM Mute
- { CS35L41_PWR_CTRL2, 0x00000000 }, //AMP_EN = 0
+ { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_VOL_PCM Mute
+ { CS35L41_PWR_CTRL2, 0x00000000 }, // AMP_EN = 0
{ CS35L41_PWR_CTRL1, 0x00000000 },
{ 0x0000742C, 0x00000009, 2000 },
{ 0x00007438, 0x00580941 },
@@ -161,11 +161,13 @@ static void cs35l41_hda_playback_hook(struct device *dev, int action)
if (reg_seq->close)
ret = regmap_multi_reg_write(reg, reg_seq->close, reg_seq->num_close);
break;
+ default:
+ ret = -EINVAL;
+ break;
}
if (ret)
dev_warn(cs35l41->dev, "Failed to apply multi reg write: %d\n", ret);
-
}
static int cs35l41_hda_channel_map(struct device *dev, unsigned int tx_num, unsigned int *tx_slot,
@@ -182,20 +184,19 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
struct hda_component *comps = master_data;
- if (comps && cs35l41->index >= 0 && cs35l41->index < HDA_MAX_COMPONENTS)
- comps = &comps[cs35l41->index];
- else
+ if (!comps || cs35l41->index < 0 || cs35l41->index >= HDA_MAX_COMPONENTS)
return -EINVAL;
- if (!comps->dev) {
- comps->dev = dev;
- strscpy(comps->name, dev_name(dev), sizeof(comps->name));
- comps->playback_hook = cs35l41_hda_playback_hook;
- comps->set_channel_map = cs35l41_hda_channel_map;
- return 0;
- }
+ comps = &comps[cs35l41->index];
+ if (comps->dev)
+ return -EBUSY;
+
+ comps->dev = dev;
+ strscpy(comps->name, dev_name(dev), sizeof(comps->name));
+ comps->playback_hook = cs35l41_hda_playback_hook;
+ comps->set_channel_map = cs35l41_hda_channel_map;
- return -EBUSY;
+ return 0;
}
static void cs35l41_hda_unbind(struct device *dev, struct device *master, void *master_data)
@@ -227,6 +228,8 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
internal_boost = true;
switch (hw_cfg->gpio1_func) {
+ case CS35L41_NOT_USED:
+ break;
case CS35l41_VSPK_SWITCH:
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO1_CTRL_MASK, 1 << CS35L41_GPIO1_CTRL_SHIFT);
@@ -235,13 +238,21 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO1_CTRL_MASK, 2 << CS35L41_GPIO1_CTRL_SHIFT);
break;
+ default:
+ dev_err(cs35l41->dev, "Invalid function %d for GPIO1\n", hw_cfg->gpio1_func);
+ return -EINVAL;
}
switch (hw_cfg->gpio2_func) {
+ case CS35L41_NOT_USED:
+ break;
case CS35L41_INTERRUPT:
regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
CS35L41_GPIO2_CTRL_MASK, 2 << CS35L41_GPIO2_CTRL_SHIFT);
break;
+ default:
+ dev_err(cs35l41->dev, "Invalid function %d for GPIO2\n", hw_cfg->gpio2_func);
+ return -EINVAL;
}
if (internal_boost) {
@@ -256,11 +267,7 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
cs35l41->reg_seq = &cs35l41_hda_reg_seq_ext_bst;
}
- ret = cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
- if (ret)
- return ret;
-
- return 0;
+ return cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
}
static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41,
@@ -269,7 +276,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
struct cs35l41_hda_hw_config *hw_cfg;
u32 values[HDA_MAX_COMPONENTS];
struct acpi_device *adev;
- struct device *acpi_dev;
+ struct device *physdev;
char *property;
size_t nval;
int i, ret;
@@ -280,11 +287,11 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
return ERR_PTR(-ENODEV);
}
- acpi_dev = get_device(acpi_get_first_physical_node(adev));
+ physdev = get_device(acpi_get_first_physical_node(adev));
acpi_dev_put(adev);
property = "cirrus,dev-index";
- ret = device_property_count_u32(acpi_dev, property);
+ ret = device_property_count_u32(physdev, property);
if (ret <= 0)
goto no_acpi_dsd;
@@ -294,7 +301,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
}
nval = ret;
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err;
@@ -311,7 +318,9 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
goto err;
}
- /* No devm_ version as CLSA0100, in no_acpi_dsd case, can't use devm version */
+ /* To use the same release code for all laptop variants we can't use devm_ version of
+ * gpiod_get here, as CLSA010* don't have a fully functional bios with an _DSD node
+ */
cs35l41->reset_gpio = fwnode_gpiod_get_index(&adev->fwnode, "reset", cs35l41->index,
GPIOD_OUT_LOW, "cs35l41-reset");
@@ -322,46 +331,46 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
}
property = "cirrus,speaker-position";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->spk_pos = values[cs35l41->index];
property = "cirrus,gpio1-func";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->gpio1_func = values[cs35l41->index];
property = "cirrus,gpio2-func";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret)
goto err_free;
hw_cfg->gpio2_func = values[cs35l41->index];
property = "cirrus,boost-peak-milliamp";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_ipk = values[cs35l41->index];
property = "cirrus,boost-ind-nanohenry";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_ind = values[cs35l41->index];
property = "cirrus,boost-cap-microfarad";
- ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+ ret = device_property_read_u32_array(physdev, property, values, nval);
if (ret == 0)
hw_cfg->bst_cap = values[cs35l41->index];
- put_device(acpi_dev);
+ put_device(physdev);
return hw_cfg;
err_free:
kfree(hw_cfg);
err:
- put_device(acpi_dev);
+ put_device(physdev);
dev_err(cs35l41->dev, "Failed property %s: %d\n", property, ret);
return ERR_PTR(ret);
@@ -370,18 +379,18 @@ no_acpi_dsd:
/*
* Device CLSA0100 doesn't have _DSD so a gpiod_get by the label reset won't work.
* And devices created by i2c-multi-instantiate don't have their device struct pointing to
- * the correct fwnode, so acpi_dev must be used here
+ * the correct fwnode, so acpi_dev must be used here.
* And devm functions expect that the device requesting the resource has the correct
- * fwnode
+ * fwnode.
*/
if (strncmp(hid, "CLSA0100", 8) != 0)
return ERR_PTR(-EINVAL);
/* check I2C address to assign the index */
cs35l41->index = id == 0x40 ? 0 : 1;
- cs35l41->reset_gpio = gpiod_get_index(acpi_dev, NULL, 0, GPIOD_OUT_HIGH);
+ cs35l41->reset_gpio = gpiod_get_index(physdev, NULL, 0, GPIOD_OUT_HIGH);
cs35l41->vspk_always_on = true;
- put_device(acpi_dev);
+ put_device(physdev);
return NULL;
}
@@ -416,8 +425,7 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
if (ret == -EBUSY) {
dev_info(cs35l41->dev, "Reset line busy, assuming shared reset\n");
} else {
- if (ret != -EPROBE_DEFER)
- dev_err(cs35l41->dev, "Failed to get reset GPIO: %d\n", ret);
+ dev_err_probe(cs35l41->dev, ret, "Failed to get reset GPIO: %d\n", ret);
goto err;
}
}
@@ -437,7 +445,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
ret = regmap_read(cs35l41->regmap, CS35L41_IRQ1_STATUS3, &int_sts);
if (ret || (int_sts & CS35L41_OTP_BOOT_ERR)) {
- dev_err(cs35l41->dev, "OTP Boot error\n");
+ dev_err(cs35l41->dev, "OTP Boot status %x error: %d\n",
+ int_sts & CS35L41_OTP_BOOT_ERR, ret);
ret = -EIO;
goto err;
}
@@ -463,6 +472,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
goto err;
}
+ ret = cs35l41_test_key_unlock(cs35l41->dev, cs35l41->regmap);
+ if (ret)
+ goto err;
+
ret = cs35l41_register_errata_patch(cs35l41->dev, cs35l41->regmap, reg_revid);
if (ret)
goto err;
@@ -473,6 +486,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
goto err;
}
+ ret = cs35l41_test_key_lock(cs35l41->dev, cs35l41->regmap);
+ if (ret)
+ goto err;
+
ret = cs35l41_hda_apply_properties(cs35l41, acpi_hw_cfg);
if (ret)
goto err;
@@ -480,8 +497,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
acpi_hw_cfg = NULL;
if (cs35l41->reg_seq->probe) {
- ret = regmap_register_patch(cs35l41->regmap, cs35l41->reg_seq->probe,
- cs35l41->reg_seq->num_probe);
+ ret = regmap_multi_reg_write(cs35l41->regmap, cs35l41->reg_seq->probe,
+ cs35l41->reg_seq->num_probe);
if (ret) {
dev_err(cs35l41->dev, "Fail to apply probe reg patch: %d\n", ret);
goto err;
@@ -506,9 +523,9 @@ err:
return ret;
}
-EXPORT_SYMBOL_GPL(cs35l41_hda_probe);
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_probe, SND_HDA_SCODEC_CS35L41);
-int cs35l41_hda_remove(struct device *dev)
+void cs35l41_hda_remove(struct device *dev)
{
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
@@ -517,11 +534,8 @@ int cs35l41_hda_remove(struct device *dev)
if (!cs35l41->vspk_always_on)
gpiod_set_value_cansleep(cs35l41->reset_gpio, 0);
gpiod_put(cs35l41->reset_gpio);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(cs35l41_hda_remove);
-
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41);
MODULE_DESCRIPTION("CS35L41 HDA Driver");
MODULE_AUTHOR("Lucas Tanure, Cirrus Logic Inc, <tanureal@opensource.cirrus.com>");
diff --git a/sound/pci/hda/cs35l41_hda.h b/sound/pci/hda/cs35l41_hda.h
index 76c69a8a22f6..74951001501c 100644
--- a/sound/pci/hda/cs35l41_hda.h
+++ b/sound/pci/hda/cs35l41_hda.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * cs35l41_hda.h -- CS35L41 ALSA HDA audio driver
+ * CS35L41 ALSA HDA audio driver
*
* Copyright 2021 Cirrus Logic, Inc.
*
@@ -64,6 +64,6 @@ struct cs35l41_hda {
int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int irq,
struct regmap *regmap);
-int cs35l41_hda_remove(struct device *dev);
+void cs35l41_hda_remove(struct device *dev);
#endif /*__CS35L41_HDA_H__*/
diff --git a/sound/pci/hda/cs35l41_hda_i2c.c b/sound/pci/hda/cs35l41_hda_i2c.c
index 4a9462fb5c14..e810b278fb91 100644
--- a/sound/pci/hda/cs35l41_hda_i2c.c
+++ b/sound/pci/hda/cs35l41_hda_i2c.c
@@ -32,7 +32,9 @@ static int cs35l41_hda_i2c_probe(struct i2c_client *clt, const struct i2c_device
static int cs35l41_hda_i2c_remove(struct i2c_client *clt)
{
- return cs35l41_hda_remove(&clt->dev);
+ cs35l41_hda_remove(&clt->dev);
+
+ return 0;
}
static const struct i2c_device_id cs35l41_hda_i2c_id[] = {
@@ -58,9 +60,9 @@ static struct i2c_driver cs35l41_i2c_driver = {
.probe = cs35l41_hda_i2c_probe,
.remove = cs35l41_hda_i2c_remove,
};
-
module_i2c_driver(cs35l41_i2c_driver);
MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c
index 77426e96c58f..9f8123893cc8 100644
--- a/sound/pci/hda/cs35l41_hda_spi.c
+++ b/sound/pci/hda/cs35l41_hda_spi.c
@@ -30,7 +30,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi)
static int cs35l41_hda_spi_remove(struct spi_device *spi)
{
- return cs35l41_hda_remove(&spi->dev);
+ cs35l41_hda_remove(&spi->dev);
+
+ return 0;
}
static const struct spi_device_id cs35l41_hda_spi_id[] = {
@@ -55,9 +57,9 @@ static struct spi_driver cs35l41_spi_driver = {
.probe = cs35l41_hda_spi_probe,
.remove = cs35l41_hda_spi_remove,
};
-
module_spi_driver(cs35l41_spi_driver);
MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c
index df0b4522babf..2d1fa706327b 100644
--- a/sound/pci/hda/patch_cs8409-tables.c
+++ b/sound/pci/hda/patch_cs8409-tables.c
@@ -490,6 +490,8 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0ADC, "Warlock", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0AF4, "Warlock", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0AF5, "Warlock", CS8409_WARLOCK),
+ SND_PCI_QUIRK(0x1028, 0x0BB5, "Warlock N3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
+ SND_PCI_QUIRK(0x1028, 0x0BB6, "Warlock V3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
SND_PCI_QUIRK(0x1028, 0x0A77, "Cyborg", CS8409_CYBORG),
SND_PCI_QUIRK(0x1028, 0x0A78, "Cyborg", CS8409_CYBORG),
SND_PCI_QUIRK(0x1028, 0x0A79, "Cyborg", CS8409_CYBORG),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index eef973661b0a..668274e52674 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6948,6 +6948,7 @@ enum {
ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
ALC287_FIXUP_LEGION_16ACHG6,
ALC287_FIXUP_CS35L41_I2C_2,
+ ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -8698,6 +8699,16 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
},
+ [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 },
+ { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_HP_MUTE_LED,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8911,6 +8922,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 5d391f62351b..96991ddf5055 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -431,6 +431,14 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = {
{}
};
+/* MSI MPG X570S Carbon Max Wifi with ALC4080 */
+static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = {
+ { 29, "Speaker Playback" },
+ { 30, "Front Headphone Playback" },
+ { 32, "IEC958 Playback" },
+ {}
+};
+
/*
* Control map entries
*/
@@ -577,6 +585,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.map = trx40_mobo_map,
.connector_map = trx40_mobo_connector_map,
},
+ { /* MSI MPG X570S Carbon Max Wifi */
+ .id = USB_ID(0x0db0, 0x419c),
+ .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
+ },
{ /* MSI TRX40 */
.id = USB_ID(0x0db0, 0x543d),
.map = trx40_mobo_map,
diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c
index 150ab3e37013..e2847c040f75 100644
--- a/sound/virtio/virtio_card.c
+++ b/sound/virtio/virtio_card.c
@@ -350,7 +350,7 @@ static void virtsnd_remove(struct virtio_device *vdev)
snd_card_free(snd->card);
vdev->config->del_vqs(vdev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
for (i = 0; snd->substreams && i < snd->nsubstreams; ++i) {
struct virtio_pcm_substream *vss = &snd->substreams[i];
@@ -379,7 +379,7 @@ static int virtsnd_freeze(struct virtio_device *vdev)
virtsnd_ctl_msg_cancel_all(snd);
vdev->config->del_vqs(vdev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
for (i = 0; i < snd->nsubstreams; ++i)
cancel_work_sync(&snd->substreams[i].elapsed_period);
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 5ef1c15e88ad..11e86739456d 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
"RECLAIM %12s%15s%15s\n"
" %15llu%15llu%15llums\n"
"THRASHING%12s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "COMPACT %12s%15s%15s\n"
" %15llu%15llu%15llums\n",
"count", "real total", "virtual total",
"delay total", "delay average",
@@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
"count", "delay total", "delay average",
(unsigned long long)t->thrashing_count,
(unsigned long long)t->thrashing_delay_total,
- average_ms(t->thrashing_delay_total, t->thrashing_count));
+ average_ms(t->thrashing_delay_total, t->thrashing_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->compact_count,
+ (unsigned long long)t->compact_delay_total,
+ average_ms(t->compact_delay_total, t->compact_count));
}
static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index d5b5f2ab87a0..18de5f76f198 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -315,6 +315,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 01e2650b9585..3faf0f97edb1 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -486,6 +486,23 @@
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+/* AMD Collaborative Processor Performance Control MSRs */
+#define MSR_AMD_CPPC_CAP1 0xc00102b0
+#define MSR_AMD_CPPC_ENABLE 0xc00102b1
+#define MSR_AMD_CPPC_CAP2 0xc00102b2
+#define MSR_AMD_CPPC_REQ 0xc00102b3
+#define MSR_AMD_CPPC_STATUS 0xc00102b4
+
+#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
+#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
+#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
+#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+
+#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
+#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
+#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
+#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..2da3316bb559 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 1cc9da6e29c7..59cf2343f3d9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
- retq
+ RET
SYM_FUNC_END(memcpy_orig)
.popsection
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 9827ae267f96..d624f2bc42f1 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
movl %edx,%ecx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
movq %rdx,%rcx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(memset_erms)
SYM_FUNC_START_LOCAL(memset_orig)
@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
.Lende:
movq %r10,%rax
- ret
+ RET
.Lbad_alignment:
cmpq $7,%rdx
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index d9b420972934..f70702fcb224 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
- task->group_leader->comm);
+ bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
return 0;
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 2cf3b1bde86e..c2a95ab47379 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
###
## HOSTCC C flags
-host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
# output directory for tests below
TMPOUT = .tmp_$$$$
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index 0076437f6e3f..b94a16ba5c6c 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -279,6 +279,7 @@ static void print_event(struct iio_event_data *event)
printf(", direction: %s", iio_ev_dir_text[dir]);
printf("\n");
+ fflush(stdout);
}
/* Enable or disable events in sysfs if the knob is available */
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 5d2ab38965cc..9ab313e93555 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#ifndef _TOOLS_LINUX_BITOPS_H_
#error only <linux/bitops.h> can be included directly
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
deleted file mode 100644
index 6481fd11012a..000000000000
--- a/tools/include/asm-generic/bitops/find.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
-#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert, unsigned long le);
-extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
-
-#ifndef find_next_bit
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_and_bit
-/**
- * find_next_and_bit - find the next set bit in both memory regions
- * @addr1: The first address to base the search on
- * @addr2: The second address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_and_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr1 & *addr2 & GENMASK(size - 1, offset);
- return val ? __ffs(val) : size;
- }
-
- return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_zero_bit
-/**
- * find_next_zero_bit - find the next cleared bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number of the next zero bit
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- if (small_const_nbits(size)) {
- unsigned long val;
-
- if (unlikely(offset >= size))
- return size;
-
- val = *addr | ~GENMASK(size - 1, offset);
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
-}
-#endif
-
-#ifndef find_first_bit
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first set bit.
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *addr & GENMASK(size - 1, 0);
-
- return val ? __ffs(val) : size;
- }
-
- return _find_first_bit(addr, size);
-}
-
-#endif /* find_first_bit */
-
-#ifndef find_first_zero_bit
-
-/**
- * find_first_zero_bit - find the first cleared bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first cleared bit.
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
- if (small_const_nbits(size)) {
- unsigned long val = *addr | ~GENMASK(size - 1, 0);
-
- return val == ~0UL ? size : ffz(val);
- }
-
- return _find_first_zero_bit(addr, size);
-}
-#endif
-
-#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 95611df1d26e..ea97804d04d4 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -1,9 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_BITOPS_H
-#define _PERF_BITOPS_H
+#ifndef _TOOLS_LINUX_BITMAP_H
+#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
#include <linux/bitops.h>
+#include <linux/find.h>
#include <stdlib.h>
#include <linux/kernel.h>
@@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
-#endif /* _PERF_BITOPS_H */
+#endif /* _TOOLS_LINUX_BITMAP_H */
diff --git a/include/asm-generic/bitops/find.h b/tools/include/linux/find.h
index 0d132ee2a291..47e2bd6c5174 100644
--- a/include/asm-generic/bitops/find.h
+++ b/tools/include/linux/find.h
@@ -1,11 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
+#ifndef _TOOLS_LINUX_FIND_H_
+#define _TOOLS_LINUX_FIND_H_
+
+#ifndef _TOOLS_LINUX_BITMAP_H
+#error tools: only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
extern unsigned long _find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, unsigned long le);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
@@ -95,8 +103,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
}
#endif
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-
+#ifndef find_first_bit
/**
* find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
@@ -116,7 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
return _find_first_bit(addr, size);
}
+#endif
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+
+#ifndef find_first_zero_bit
/**
* find_first_zero_bit - find the first cleared bit in a memory region
* @addr: The address to start the search at
@@ -136,16 +170,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
return _find_first_zero_bit(addr, size);
}
-#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
-#ifndef find_first_bit
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
#endif
-#ifndef find_first_zero_bit
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-#endif
-
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
#ifndef find_last_bit
/**
@@ -185,4 +210,5 @@ extern unsigned long find_next_clump8(unsigned long *clump,
#define find_first_clump8(clump, bits, size) \
find_next_clump8((clump), (bits), (size), 0)
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
+
+#endif /*__LINUX_FIND_H_ */
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/tools/include/linux/hash.h
+++ b/tools/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
return val * GOLDEN_RATIO_32;
}
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */
return __hash_32(val) >> (32 - bits);
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 4557a8b6086f..1c48b0ae3ba3 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+
#undef __NR_syscalls
-#define __NR_syscalls 450
+#define __NR_syscalls 451
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 3b810b53ba8b..642808520d92 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1096,6 +1096,24 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer)
#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+/**
+ * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
+ *
+ * This queries metadata about a framebuffer. User-space fills
+ * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the
+ * struct as the output.
+ *
+ * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
+ * will be filled with GEM buffer handles. Planes are valid until one has a
+ * zero handle -- this can be used to compute the number of planes.
+ *
+ * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
+ * until one has a zero &drm_mode_fb_cmd2.pitches.
+ *
+ * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
+ * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
+ * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ */
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
/*
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1daa45268de2..9563d294f181 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1131,6 +1131,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_VM_GPA_BITS 207
+#define KVM_CAP_XSAVE2 208
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1162,11 +1164,20 @@ struct kvm_irq_routing_hv_sint {
__u32 sint;
};
+struct kvm_irq_routing_xen_evtchn {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+};
+
+#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
+
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
struct kvm_irq_routing_entry {
__u32 gsi;
@@ -1178,6 +1189,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
+ struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1208,6 +1220,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1610,6 +1623,9 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bd8860eeb291..4cd39aaccbe7 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1332,7 +1332,10 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-/* 2-7 available */
+#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3 0x04 /* remote board */
+/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 109aa7ffcf97..ba4b8d94e004 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
}
#endif
+#ifndef find_first_and_bit
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ unsigned long idx, val;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ val = addr1[idx] & addr2[idx];
+ if (val)
+ return min(idx * BITS_PER_LONG + __ffs(val), size);
+ }
+
+ return size;
+}
+#endif
+
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 63ae5e0195ce..32c5051c24eb 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -48,6 +48,7 @@ SYNOPSIS
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
+ bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
@@ -135,16 +136,16 @@ SYNOPSIS
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
- void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
void perf_evsel__munmap(struct perf_evsel *evsel);
- void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
- int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
- int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__disable(struct perf_evsel *evsel);
- int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index adaad3dddf6e..ee66760f1e63 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -10,15 +10,24 @@
#include <ctype.h>
#include <limits.h>
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+ struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (cpus != NULL) {
- cpus->nr = 1;
- cpus->map[0] = -1;
+ cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
+
}
+ return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+ if (cpus)
+ cpus->map[0].cpu = -1;
return cpus;
}
@@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void)
if (nr_cpus < 0)
return NULL;
- cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+ cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
- cpus->map[i] = i;
-
- cpus->nr = nr_cpus;
- refcount_set(&cpus->refcnt, 1);
+ cpus->map[i].cpu = i;
}
return cpus;
@@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void)
return cpu_map__default_new();
}
-static int cmp_int(const void *a, const void *b)
+
+static int cmp_cpu(const void *a, const void *b)
{
- return *(const int *)a - *(const int*)b;
+ const struct perf_cpu *cpu_a = a, *cpu_b = b;
+
+ return cpu_a->cpu - cpu_b->cpu;
}
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
- size_t payload_size = nr_cpus * sizeof(int);
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+ size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
int i, j;
if (cpus != NULL) {
memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+ qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i] != cpus->map[i - 1])
- cpus->map[j++] = cpus->map[i];
+ if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
+ cpus->map[j++].cpu = cpus->map[i].cpu;
}
cpus->nr = j;
assert(j <= nr_cpus);
- refcount_set(&cpus->refcnt, 1);
}
-
return cpus;
}
@@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
@@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
- tmp_cpus[nr_cpus++] = prev;
+ tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = cpu;
+ tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
@@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
unsigned long start_cpu, end_cpu = 0;
char *p = NULL;
int i, nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
if (!cpu_list)
@@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i] == (int)start_cpu)
+ if (tmp_cpus[i].cpu == (int)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = (int)start_cpu;
+ tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
}
if (*p)
++p;
@@ -250,12 +257,16 @@ out:
return cpus;
}
-int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
if (cpus && idx < cpus->nr)
return cpus->map[idx];
- return -1;
+ return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
@@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
- return map ? map->map[0] == -1 : true;
+ return map ? map->map[0].cpu == -1 : true;
}
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
- int low = 0, high = cpus->nr;
+ int low, high;
+ if (!cpus)
+ return -1;
+
+ low = 0;
+ high = cpus->nr;
while (low < high) {
- int idx = (low + high) / 2,
- cpu_at_idx = cpus->map[idx];
+ int idx = (low + high) / 2;
+ struct perf_cpu cpu_at_idx = cpus->map[idx];
- if (cpu_at_idx == cpu)
+ if (cpu_at_idx.cpu == cpu.cpu)
return idx;
- if (cpu_at_idx > cpu)
+ if (cpu_at_idx.cpu > cpu.cpu)
high = idx;
else
low = idx + 1;
@@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
-int perf_cpu_map__max(struct perf_cpu_map *map)
+bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
+ return perf_cpu_map__idx(cpus, cpu) != -1;
+}
+
+struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
+{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return map->nr > 0 ? map->map[map->nr - 1] : -1;
+ return map->nr > 0 ? map->map[map->nr - 1] : result;
}
/*
@@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- int *tmp_cpus;
+ struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
@@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
if (!other)
return orig;
if (orig->nr == other->nr &&
- !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
return orig;
tmp_len = orig->nr + other->nr;
- tmp_cpus = malloc(tmp_len * sizeof(int));
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < orig->nr && j < other->nr) {
- if (orig->map[i] <= other->map[j]) {
- if (orig->map[i] == other->map[j])
+ if (orig->map[i].cpu <= other->map[j].cpu) {
+ if (orig->map[i].cpu == other->map[j].cpu)
j++;
tmp_cpus[k++] = orig->map[i++];
} else
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e37dfad31383..9a770bfdc804 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
@@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
{
- int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
-void __perf_evlist__set_leader(struct list_head *list)
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
- struct perf_evsel *evsel, *leader;
+ struct perf_evsel *first, *last, *evsel;
- leader = list_entry(list->next, struct perf_evsel, node);
- evsel = list_entry(list->prev, struct perf_evsel, node);
+ first = list_first_entry(list, struct perf_evsel, node);
+ last = list_last_entry(list, struct perf_evsel, node);
- leader->nr_members = evsel->idx - leader->idx + 1;
+ leader->nr_members = last->idx - first->idx + 1;
__perf_evlist__for_each_entry(list, evsel)
evsel->leader = leader;
@@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list)
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
if (evlist->nr_entries) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
- __perf_evlist__set_leader(&evlist->entries);
+ __perf_evlist__set_leader(&evlist->entries, first);
}
}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8441e3e1aaac..210ea7c06ce8 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y))
-#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
+#define FD(_evsel, _cpu_map_idx, _thread) \
+ ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
+#define MMAP(_evsel, _cpu_map_idx, _thread) \
+ (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
+ : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
+ int idx, thread;
+
+ for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd)
*fd = -1;
@@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre
static int
sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
+ pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
}
-static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd)
+static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
{
struct perf_evsel *leader = evsel->leader;
int *fd;
@@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
if (!leader->fd)
return -ENOTCONN;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
if (fd == NULL || *fd == -1)
return -EBADF;
@@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int cpu, thread, err = 0;
+ struct perf_cpu cpu;
+ int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
@@ -136,24 +141,24 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
}
if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
int fd, group_fd, *evsel_fd;
- evsel_fd = FD(evsel, cpu, thread);
+ evsel_fd = FD(evsel, idx, thread);
if (evsel_fd == NULL)
return -EINVAL;
- err = get_group_fd(evsel, cpu, thread, &group_fd);
+ err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
return err;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
- cpus->map[cpu], group_fd, 0);
+ cpu, group_fd, 0);
if (fd < 0)
return -errno;
@@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd && *fd >= 0) {
close(*fd);
@@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
+ for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
+ perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
- perf_evsel__close_fd_cpu(evsel, cpu);
+ perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
}
void perf_evsel__munmap(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int idx, thread;
if (evsel->fd == NULL || evsel->mmap == NULL)
return;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd == NULL || *fd < 0)
continue;
- perf_mmap__munmap(MMAP(evsel, cpu, thread));
+ perf_mmap__munmap(MMAP(evsel, idx, thread));
}
}
@@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
{
- int ret, cpu, thread;
+ int ret, idx, thread;
struct perf_mmap_param mp = {
.prot = PROT_READ | PROT_WRITE,
.mask = (pages * page_size) - 1,
@@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
struct perf_mmap *map;
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
if (fd == NULL || *fd < 0)
continue;
- map = MMAP(evsel, cpu, thread);
+ map = MMAP(evsel, idx, thread);
perf_mmap__init(map, NULL, false, NULL);
ret = perf_mmap__mmap(map, &mp, *fd, cpu);
@@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
return 0;
}
-void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
{
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
- if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL)
+ if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
return NULL;
- return MMAP(evsel, cpu, thread)->base;
+ return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
- if (MMAP(evsel, cpu, thread) &&
- !perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
+ if (MMAP(evsel, cpu_map_idx, thread) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
if (readn(*fd, count->values, size) <= 0)
@@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
- int cpu)
+ int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int err;
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
@@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
}
int perf_evsel__enable(struct perf_evsel *evsel)
@@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@@ -380,7 +384,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
int err = 0, i;
- for (i = 0; i < evsel->cpus->nr && !err; i++)
+ for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter, i);
@@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 840d4032587b..581f9ffb4237 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -4,16 +4,30 @@
#include <linux/refcount.h>
+/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
+struct perf_cpu {
+ int cpu;
+};
+
+/**
+ * A sized, reference counted, sorted array of integers representing CPU
+ * numbers. This is commonly used to capture which CPUs a PMU is associated
+ * with. The indices into the cpumap are frequently used as they avoid having
+ * gaps if CPU numbers were used. For events associated with a pid, rather than
+ * a CPU, a single dummy map with an entry of -1 is used.
+ */
struct perf_cpu_map {
refcount_t refcnt;
+ /** Length of the map array. */
int nr;
- int map[];
+ /** The CPU values. */
+ struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index f366dbad6a88..4cefade540bd 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <api/fd/array.h>
+#include <internal/cpumap.h>
#include <internal/evsel.h>
#define PERF_EVLIST__HLIST_BITS 8
@@ -36,7 +37,7 @@ typedef void
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
-(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
@@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
-void __perf_evlist__set_leader(struct list_head *list);
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index 1f3eacbad2e8..cfc9ebd7968e 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -6,8 +6,8 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include <sys/types.h>
+#include <internal/cpumap.h>
-struct perf_cpu_map;
struct perf_thread_map;
struct xyarray;
@@ -27,7 +27,7 @@ struct perf_sample_id {
* queue number.
*/
int idx;
- int cpu;
+ struct perf_cpu cpu;
pid_t tid;
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5e3422f40ed5..5a062af8e9d8 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <stdbool.h>
+#include <internal/cpumap.h>
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
@@ -24,7 +25,7 @@ struct perf_mmap {
void *base;
int mask;
int fd;
- int cpu;
+ struct perf_cpu cpu;
refcount_t refcnt;
u64 prev;
u64 start;
@@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu);
+ int fd, struct perf_cpu cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 7c27766ea0bf..15b8faafd615 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,11 +3,10 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
+#include <perf/cpumap.h>
#include <stdio.h>
#include <stdbool.h>
-struct perf_cpu_map;
-
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
@@ -16,10 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index 60eae25076d3..2a9516b42d15 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
+#include <stdbool.h>
+#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
-LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
-LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
-LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 71468606e8a7..93696affda2e 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__cpu;
perf_cpu_map__empty;
perf_cpu_map__max;
+ perf_cpu_map__has;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
@@ -50,6 +51,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_counts_values__scale;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c89dfa5f67b3..f7ee07cb5818 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu)
+ int fd, struct perf_cpu cpu)
{
map->prev = 0;
map->mask = mp->mask;
@@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
count->ena += delta;
if (idx)
count->run += delta;
-
- cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ce91a582f0e4..b3479dfa9a1c 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -21,6 +21,9 @@
#include "tests.h"
#include <internal/evsel.h>
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
+
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@@ -331,7 +334,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
- int id, err, cpu, tmp;
+ int id, err, tmp;
+ struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@@ -374,7 +378,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
+ CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@@ -413,6 +417,159 @@ static int test_mmap_cpus(void)
return 0;
}
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evsel", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
int test_evlist(int argc, char **argv)
{
__T_START;
@@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index fe58843d047c..8e24c4c78c7f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field)
return 0;
}
+static int field_is_relative_dynamic(struct tep_format_field *field)
+{
+ if (strncmp(field->type, "__rel_loc", 9) == 0)
+ return 1;
+
+ return 0;
+}
+
static int field_is_long(struct tep_format_field *field)
{
/* includes long long */
@@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
field->flags |= TEP_FIELD_IS_STRING;
if (field_is_dynamic(field))
field->flags |= TEP_FIELD_IS_DYNAMIC;
+ if (field_is_relative_dynamic(field))
+ field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE;
if (field_is_long(field))
field->flags |= TEP_FIELD_IS_LONG;
@@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
arg->type = TEP_PRINT_STRING;
arg->string.string = token;
- arg->string.offset = -1;
+ arg->string.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
arg->type = TEP_PRINT_BITMASK;
arg->bitmask.bitmask = token;
- arg->bitmask.offset = -1;
+ arg->bitmask.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
free_token(token);
return process_int_array(event, arg, tok);
}
- if (strcmp(token, "__get_str") == 0) {
+ if (strcmp(token, "__get_str") == 0 ||
+ strcmp(token, "__get_rel_str") == 0) {
free_token(token);
return process_str(event, arg, tok);
}
- if (strcmp(token, "__get_bitmask") == 0) {
+ if (strcmp(token, "__get_bitmask") == 0 ||
+ strcmp(token, "__get_rel_bitmask") == 0) {
free_token(token);
return process_bitmask(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array") == 0) {
+ if (strcmp(token, "__get_dynamic_array") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array") == 0) {
free_token(token);
return process_dynamic_array(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array_len") == 0) {
+ if (strcmp(token, "__get_dynamic_array_len") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array_len") == 0) {
free_token(token);
return process_dynamic_array_len(event, arg, tok);
}
@@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
case TEP_PRINT_STRING: {
int str_offset;
- if (arg->string.offset == -1) {
- struct tep_format_field *f;
+ if (!arg->string.field)
+ arg->string.field = tep_find_any_field(event, arg->string.string);
+ if (!arg->string.field)
+ break;
- f = tep_find_any_field(event, arg->string.string);
- arg->string.offset = f->offset;
- }
- str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+ str_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->string.field->offset));
str_offset &= 0xffff;
+ if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE)
+ str_offset += arg->string.field->offset + arg->string.field->size;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
}
@@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
int bitmask_offset;
int bitmask_size;
- if (arg->bitmask.offset == -1) {
- struct tep_format_field *f;
-
- f = tep_find_any_field(event, arg->bitmask.bitmask);
- arg->bitmask.offset = f->offset;
- }
- bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+ if (!arg->bitmask.field)
+ arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask);
+ if (!arg->bitmask.field)
+ break;
+ bitmask_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->bitmask.field->offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
+ if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE)
+ bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size;
print_bitmask_to_seq(tep, s, format, len_arg,
data + bitmask_offset, bitmask_size);
break;
@@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
data + offset, field->size);
*len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
*len = field->size;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index a67ad9a5b835..41d4f9f6a843 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -125,6 +125,7 @@ enum tep_format_flags {
TEP_FIELD_IS_LONG = 32,
TEP_FIELD_IS_FLAG = 64,
TEP_FIELD_IS_SYMBOLIC = 128,
+ TEP_FIELD_IS_RELATIVE = 256,
};
struct tep_format_field {
@@ -153,12 +154,12 @@ struct tep_print_arg_atom {
struct tep_print_arg_string {
char *string;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_bitmask {
char *bitmask;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_field {
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 368826bb5a57..5df177070d53 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
addr = *(unsigned int *)val;
- val = record->data + (addr & 0xffff);
size = addr >> 16;
+ addr &= 0xffff;
+ if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE)
+ addr += arg->str.field->offset + arg->str.field->size;
+ val = record->data + addr;
}
/*
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8c1931eab5f1..c2d2ab9a2861 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -168,14 +168,16 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"panic",
"do_exit",
"do_task_dead",
- "__module_put_and_exit",
- "complete_and_exit",
+ "kthread_exit",
+ "make_task_dead",
+ "__module_put_and_kthread_exit",
+ "kthread_complete_and_exit",
"__reiserfs_panic",
"lbug_with_loc",
"fortify_panic",
"usercopy_abort",
"machine_real_restart",
- "rewind_stack_do_exit",
+ "rewind_stack_and_make_dead",
"kunit_try_catch_throw",
"xen_start_kernel",
"cpu_bringup_and_idle",
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index cd8ce6e8ec12..7e44b419d301 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -74,12 +74,15 @@ OPTIONS
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
---debuginfod=URLs::
+--debuginfod[=URLs]::
Specify debuginfod URL to be used when retrieving perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
buildid-cache.debuginfod=http://192.168.122.174:8002
+ If the URLs is not specified, the value of DEBUGINFOD_URLS
+ system environment variable is used.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 3bb75c1f25e8..0420e71698ee 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -587,6 +587,15 @@ record.*::
Use 'n' control blocks in asynchronous (Posix AIO) trace writing
mode ('n' default: 1, max: 4).
+ record.debuginfod::
+ Specify debuginfod URL to be used when cacheing perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ http://192.168.122.174:8002
+
+ If the URLs is 'system', the value of DEBUGINFOD_URLS system environment
+ variable is used.
+
diff.*::
diff.order::
This option sets the number of columns to sort the result.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 4dc8d0af19df..57384a97c04f 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
clocks not halted) and 0xC1 (micro-ops retired). Both events map to
IBS execution sampling (IBS op) with the IBS Op Counter Control bit
-(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+(IbsOpCntCtl) set respectively (see the
+Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
+section of the [AMD Processor Programming Reference (PPR)] relevant to the
+family, model and stepping of the processor being used).
+
Manual Volume 2: System Programming, 13.3 Instruction-Based
Sampling). Examples to use IBS:
@@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR
Even when an event is not available in a symbolic form within perf right now,
it can be encoded in a per processor specific way.
-For instance For x86 CPUs NNN represents the raw register encoding with the
+For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the
layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
-of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
-Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the
+Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the
+[AMD Processor Programming Reference (PPR)] relevant to the family, model
+and stepping of the processor being used).
Note: Only the following bit fields can be set in x86 counter
registers: event, umask, edge, inv, cmask. Esp. guest/host only and
@@ -126,6 +132,38 @@ It's also possible to use pmu syntax:
perf record -e cpu/r1a8/ ...
perf record -e cpu/r0x1a8/ ...
+Some processors, like those from AMD, support event codes and unit masks
+larger than a byte. In such cases, the bits corresponding to the event
+configuration parameters can be seen with:
+
+ cat /sys/bus/event_source/devices/<pmu>/format/<config>
+
+Example:
+
+If the AMD docs for an EPYC 7713 processor describe an event as:
+
+ Event Umask Event Mask
+ Num. Value Mnemonic Description
+
+ 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag
+ hit events.
+
+raw encoding of 0x0328F cannot be used since the upper nibble of the
+EventSelect bits have to be specified via bits 32-35 as can be seen with:
+
+ cat /sys/bus/event_source/devices/cpu/format/event
+
+raw encoding of 0x20000038F should be used instead:
+
+ perf stat -e r20000038f -a sleep 1
+ perf record -e r20000038f ...
+
+It's also possible to use pmu syntax:
+
+ perf record -e r20000038f -a sleep 1
+ perf record -e cpu/r20000038f/ ...
+ perf record -e cpu/r0x20000038f/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -316,4 +354,4 @@ SEE ALSO
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
-http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
+https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3cf7bac67239..9ccc75935bc5 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -30,8 +30,10 @@ OPTIONS
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@@ -713,6 +715,15 @@ measurements:
include::intel-hybrid.txt[]
+--debuginfod[=URLs]::
+ Specify debuginfod URL to be used when cacheing perf.data binaries,
+ it follows the same syntax as the DEBUGINFOD_URLS variable, like:
+
+ http://192.168.122.174:8002
+
+ If the URLs is not specified, the value of DEBUGINFOD_URLS
+ system environment variable is used.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 7e6fb7cbc0f4..c06c341e72b9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -36,8 +36,10 @@ report::
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable
$ perf config stat.no-csv-summary=true
+--cputype::
+Only enable events on applying cpu with this type for hybrid platform
+(e.g. core or atom)"
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9898a32b8d9c..cac3dfbee7d8 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -38,9 +38,10 @@ Default is to monitor all CPUS.
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
- (use 'perf list' to list all events) or a raw PMU
- event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ (use 'perf list' to list all events) or a raw PMU event in the form
+ of rN where N is a hexadecimal value that represents the raw register
+ encoding with the layout of the event control registers as described
+ by entries in /sys/bus/event_sources/devices/cpu/format/*.
-E <entries>::
--entries=<entries>::
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 3df74cf5651a..96ad944ca6a8 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
+HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
-OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++
+OPENCSDLIBS := -lopencsd_c_api
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+ OPENCSDLIBS += -lopencsd -lstdc++
+endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
endif
@@ -211,6 +215,7 @@ endif
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
+ HOSTCFLAGS += -Werror
endif
ifndef DEBUG
@@ -290,6 +295,9 @@ CXXFLAGS += -ggdb3
CXXFLAGS += -funwind-tables
CXXFLAGS += -Wno-strict-aliasing
+HOSTCFLAGS += -Wall
+HOSTCFLAGS += -Wextra
+
# Enforce a non-executable stack, as we may regress (again) in the future by
# adding assembler files missing the .GNU-stack linker note.
LDFLAGS += -Wl,-z,noexecstack
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 80522bcfafe0..ac861e42c8f7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -226,7 +226,7 @@ else
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
-export HOSTCC HOSTLD HOSTAR
+export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
include $(srctree)/tools/build/Makefile.include
@@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
-SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index 4085419283d0..99a06550e25d 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM_R0:
- return "r0";
- case PERF_REG_ARM_R1:
- return "r1";
- case PERF_REG_ARM_R2:
- return "r2";
- case PERF_REG_ARM_R3:
- return "r3";
- case PERF_REG_ARM_R4:
- return "r4";
- case PERF_REG_ARM_R5:
- return "r5";
- case PERF_REG_ARM_R6:
- return "r6";
- case PERF_REG_ARM_R7:
- return "r7";
- case PERF_REG_ARM_R8:
- return "r8";
- case PERF_REG_ARM_R9:
- return "r9";
- case PERF_REG_ARM_R10:
- return "r10";
- case PERF_REG_ARM_FP:
- return "fp";
- case PERF_REG_ARM_IP:
- return "ip";
- case PERF_REG_ARM_SP:
- return "sp";
- case PERF_REG_ARM_LR:
- return "lr";
- case PERF_REG_ARM_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 293a23bf8be3..2e8b2c4365a0 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
/* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(event_cpus, i) ||
- !cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(event_cpus, cpu) ||
+ !perf_cpu_map__has(online_cpus, cpu))
continue;
if (option & BIT(ETM_OPT_CTXTID)) {
@@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
}
- /* Validate auxtrace_mmap_pages provided by user */
- if (opts->auxtrace_mmap_pages) {
- unsigned int max_page = (KiB(128) / page_size);
- size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
-
- if (!privileged &&
- opts->auxtrace_mmap_pages > max_page) {
- opts->auxtrace_mmap_pages = max_page;
- pr_err("auxtrace too big, truncating to %d\n",
- max_page);
- }
-
- if (!is_power_of_2(sz)) {
- pr_err("Invalid mmap size for %s: must be a power of 2\n",
- CORESIGHT_ETM_PMU_NAME);
- return -EINVAL;
- }
- }
-
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
@@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
/* cpu map is not empty, we have specific CPUs to work with */
if (!perf_cpu_map__empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(event_cpus, i) ||
- !cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(event_cpus, cpu) ||
+ !perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
} else {
/* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu(); i++) {
- if (!cpu_map__has(online_cpus, i))
+ for (i = 0; i < cpu__max_cpu().cpu; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- if (cpu_map__has(event_cpus, i) &&
- !cpu_map__has(online_cpus, i))
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (perf_cpu_map__has(event_cpus, cpu) &&
+ !perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
@@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
- for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
- if (cpu_map__has(cpu_map, i))
+ for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) {
+ struct perf_cpu cpu = { .cpu = i, };
+
+ if (perf_cpu_map__has(cpu_map, cpu))
cs_etm_get_metadata(i, &offset, itr, info);
+ }
perf_cpu_map__put(online_cpus);
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index fa3e07459f76..35a3cc775b39 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -4,7 +4,9 @@
#include <stdlib.h>
#include <linux/types.h>
+#define perf_event_arm_regs perf_event_arm64_regs
#include <asm/perf_regs.h>
+#undef perf_event_arm_regs
void perf_regs_load(u64 *regs);
@@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM64_X0:
- return "x0";
- case PERF_REG_ARM64_X1:
- return "x1";
- case PERF_REG_ARM64_X2:
- return "x2";
- case PERF_REG_ARM64_X3:
- return "x3";
- case PERF_REG_ARM64_X4:
- return "x4";
- case PERF_REG_ARM64_X5:
- return "x5";
- case PERF_REG_ARM64_X6:
- return "x6";
- case PERF_REG_ARM64_X7:
- return "x7";
- case PERF_REG_ARM64_X8:
- return "x8";
- case PERF_REG_ARM64_X9:
- return "x9";
- case PERF_REG_ARM64_X10:
- return "x10";
- case PERF_REG_ARM64_X11:
- return "x11";
- case PERF_REG_ARM64_X12:
- return "x12";
- case PERF_REG_ARM64_X13:
- return "x13";
- case PERF_REG_ARM64_X14:
- return "x14";
- case PERF_REG_ARM64_X15:
- return "x15";
- case PERF_REG_ARM64_X16:
- return "x16";
- case PERF_REG_ARM64_X17:
- return "x17";
- case PERF_REG_ARM64_X18:
- return "x18";
- case PERF_REG_ARM64_X19:
- return "x19";
- case PERF_REG_ARM64_X20:
- return "x20";
- case PERF_REG_ARM64_X21:
- return "x21";
- case PERF_REG_ARM64_X22:
- return "x22";
- case PERF_REG_ARM64_X23:
- return "x23";
- case PERF_REG_ARM64_X24:
- return "x24";
- case PERF_REG_ARM64_X25:
- return "x25";
- case PERF_REG_ARM64_X26:
- return "x26";
- case PERF_REG_ARM64_X27:
- return "x27";
- case PERF_REG_ARM64_X28:
- return "x28";
- case PERF_REG_ARM64_X29:
- return "x29";
- case PERF_REG_ARM64_SP:
- return "sp";
- case PERF_REG_ARM64_LR:
- return "lr";
- case PERF_REG_ARM64_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 7e7714290a87..d2ce31e28cd7 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
+}
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index d3a18f9c85f6..79124bba713e 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
*/
- if (pmu->cpus->nr != cpu__max_cpu())
+ if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);
diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h
index 25ac3bdcb9d1..1afcc0e916c2 100644
--- a/tools/perf/arch/csky/include/perf_regs.h
+++ b/tools/perf/arch/csky/include/perf_regs.h
@@ -15,86 +15,4 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_CSKY_A0:
- return "a0";
- case PERF_REG_CSKY_A1:
- return "a1";
- case PERF_REG_CSKY_A2:
- return "a2";
- case PERF_REG_CSKY_A3:
- return "a3";
- case PERF_REG_CSKY_REGS0:
- return "regs0";
- case PERF_REG_CSKY_REGS1:
- return "regs1";
- case PERF_REG_CSKY_REGS2:
- return "regs2";
- case PERF_REG_CSKY_REGS3:
- return "regs3";
- case PERF_REG_CSKY_REGS4:
- return "regs4";
- case PERF_REG_CSKY_REGS5:
- return "regs5";
- case PERF_REG_CSKY_REGS6:
- return "regs6";
- case PERF_REG_CSKY_REGS7:
- return "regs7";
- case PERF_REG_CSKY_REGS8:
- return "regs8";
- case PERF_REG_CSKY_REGS9:
- return "regs9";
- case PERF_REG_CSKY_SP:
- return "sp";
- case PERF_REG_CSKY_LR:
- return "lr";
- case PERF_REG_CSKY_PC:
- return "pc";
-#if defined(__CSKYABIV2__)
- case PERF_REG_CSKY_EXREGS0:
- return "exregs0";
- case PERF_REG_CSKY_EXREGS1:
- return "exregs1";
- case PERF_REG_CSKY_EXREGS2:
- return "exregs2";
- case PERF_REG_CSKY_EXREGS3:
- return "exregs3";
- case PERF_REG_CSKY_EXREGS4:
- return "exregs4";
- case PERF_REG_CSKY_EXREGS5:
- return "exregs5";
- case PERF_REG_CSKY_EXREGS6:
- return "exregs6";
- case PERF_REG_CSKY_EXREGS7:
- return "exregs7";
- case PERF_REG_CSKY_EXREGS8:
- return "exregs8";
- case PERF_REG_CSKY_EXREGS9:
- return "exregs9";
- case PERF_REG_CSKY_EXREGS10:
- return "exregs10";
- case PERF_REG_CSKY_EXREGS11:
- return "exregs11";
- case PERF_REG_CSKY_EXREGS12:
- return "exregs12";
- case PERF_REG_CSKY_EXREGS13:
- return "exregs13";
- case PERF_REG_CSKY_EXREGS14:
- return "exregs14";
- case PERF_REG_CSKY_TLS:
- return "tls";
- case PERF_REG_CSKY_HI:
- return "hi";
- case PERF_REG_CSKY_LO:
- return "lo";
-#endif
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index e2c481fcede6..3f1886ad9d80 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -364,3 +364,4 @@
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h
index ee73b36a14d1..b8cd8bbb37ba 100644
--- a/tools/perf/arch/mips/include/perf_regs.h
+++ b/tools/perf/arch/mips/include/perf_regs.h
@@ -12,73 +12,4 @@
#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_MIPS_PC:
- return "PC";
- case PERF_REG_MIPS_R1:
- return "$1";
- case PERF_REG_MIPS_R2:
- return "$2";
- case PERF_REG_MIPS_R3:
- return "$3";
- case PERF_REG_MIPS_R4:
- return "$4";
- case PERF_REG_MIPS_R5:
- return "$5";
- case PERF_REG_MIPS_R6:
- return "$6";
- case PERF_REG_MIPS_R7:
- return "$7";
- case PERF_REG_MIPS_R8:
- return "$8";
- case PERF_REG_MIPS_R9:
- return "$9";
- case PERF_REG_MIPS_R10:
- return "$10";
- case PERF_REG_MIPS_R11:
- return "$11";
- case PERF_REG_MIPS_R12:
- return "$12";
- case PERF_REG_MIPS_R13:
- return "$13";
- case PERF_REG_MIPS_R14:
- return "$14";
- case PERF_REG_MIPS_R15:
- return "$15";
- case PERF_REG_MIPS_R16:
- return "$16";
- case PERF_REG_MIPS_R17:
- return "$17";
- case PERF_REG_MIPS_R18:
- return "$18";
- case PERF_REG_MIPS_R19:
- return "$19";
- case PERF_REG_MIPS_R20:
- return "$20";
- case PERF_REG_MIPS_R21:
- return "$21";
- case PERF_REG_MIPS_R22:
- return "$22";
- case PERF_REG_MIPS_R23:
- return "$23";
- case PERF_REG_MIPS_R24:
- return "$24";
- case PERF_REG_MIPS_R25:
- return "$25";
- case PERF_REG_MIPS_R28:
- return "$28";
- case PERF_REG_MIPS_R29:
- return "$29";
- case PERF_REG_MIPS_R30:
- return "$30";
- case PERF_REG_MIPS_R31:
- return "$31";
- default:
- break;
- }
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 15109af9d075..2600b4237292 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 93339d17acc4..9bb17c3f370b 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1
-static const char *reg_names[] = {
- [PERF_REG_POWERPC_R0] = "r0",
- [PERF_REG_POWERPC_R1] = "r1",
- [PERF_REG_POWERPC_R2] = "r2",
- [PERF_REG_POWERPC_R3] = "r3",
- [PERF_REG_POWERPC_R4] = "r4",
- [PERF_REG_POWERPC_R5] = "r5",
- [PERF_REG_POWERPC_R6] = "r6",
- [PERF_REG_POWERPC_R7] = "r7",
- [PERF_REG_POWERPC_R8] = "r8",
- [PERF_REG_POWERPC_R9] = "r9",
- [PERF_REG_POWERPC_R10] = "r10",
- [PERF_REG_POWERPC_R11] = "r11",
- [PERF_REG_POWERPC_R12] = "r12",
- [PERF_REG_POWERPC_R13] = "r13",
- [PERF_REG_POWERPC_R14] = "r14",
- [PERF_REG_POWERPC_R15] = "r15",
- [PERF_REG_POWERPC_R16] = "r16",
- [PERF_REG_POWERPC_R17] = "r17",
- [PERF_REG_POWERPC_R18] = "r18",
- [PERF_REG_POWERPC_R19] = "r19",
- [PERF_REG_POWERPC_R20] = "r20",
- [PERF_REG_POWERPC_R21] = "r21",
- [PERF_REG_POWERPC_R22] = "r22",
- [PERF_REG_POWERPC_R23] = "r23",
- [PERF_REG_POWERPC_R24] = "r24",
- [PERF_REG_POWERPC_R25] = "r25",
- [PERF_REG_POWERPC_R26] = "r26",
- [PERF_REG_POWERPC_R27] = "r27",
- [PERF_REG_POWERPC_R28] = "r28",
- [PERF_REG_POWERPC_R29] = "r29",
- [PERF_REG_POWERPC_R30] = "r30",
- [PERF_REG_POWERPC_R31] = "r31",
- [PERF_REG_POWERPC_NIP] = "nip",
- [PERF_REG_POWERPC_MSR] = "msr",
- [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
- [PERF_REG_POWERPC_CTR] = "ctr",
- [PERF_REG_POWERPC_LINK] = "link",
- [PERF_REG_POWERPC_XER] = "xer",
- [PERF_REG_POWERPC_CCR] = "ccr",
- [PERF_REG_POWERPC_SOFTE] = "softe",
- [PERF_REG_POWERPC_TRAP] = "trap",
- [PERF_REG_POWERPC_DAR] = "dar",
- [PERF_REG_POWERPC_DSISR] = "dsisr",
- [PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra",
- [PERF_REG_POWERPC_MMCR0] = "mmcr0",
- [PERF_REG_POWERPC_MMCR1] = "mmcr1",
- [PERF_REG_POWERPC_MMCR2] = "mmcr2",
- [PERF_REG_POWERPC_MMCR3] = "mmcr3",
- [PERF_REG_POWERPC_SIER2] = "sier2",
- [PERF_REG_POWERPC_SIER3] = "sier3",
- [PERF_REG_POWERPC_PMC1] = "pmc1",
- [PERF_REG_POWERPC_PMC2] = "pmc2",
- [PERF_REG_POWERPC_PMC3] = "pmc3",
- [PERF_REG_POWERPC_PMC4] = "pmc4",
- [PERF_REG_POWERPC_PMC5] = "pmc5",
- [PERF_REG_POWERPC_PMC6] = "pmc6",
- [PERF_REG_POWERPC_SDAR] = "sdar",
- [PERF_REG_POWERPC_SIAR] = "siar",
-};
-
-static inline const char *__perf_reg_name(int id)
-{
- return reg_names[id];
-}
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index 3bf441257466..cf430a4c55b9 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local INSTR Latency"))
return "Finish Cyc";
- else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ else if (!strcmp(se_header, "INSTR Latency"))
+ return "Global Finish_cyc";
+ else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
return "Dispatch Cyc";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Global Dispatch_cyc";
return se_header;
}
@@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
{
if (!strcmp(sort_key, "p_stage_cyc"))
return 1;
+ if (!strcmp(sort_key, "local_p_stage_cyc"))
+ return 1;
return 0;
}
diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h
index 6b02a767c918..6944bf0de53e 100644
--- a/tools/perf/arch/riscv/include/perf_regs.h
+++ b/tools/perf/arch/riscv/include/perf_regs.h
@@ -19,78 +19,4 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_RISCV_PC:
- return "pc";
- case PERF_REG_RISCV_RA:
- return "ra";
- case PERF_REG_RISCV_SP:
- return "sp";
- case PERF_REG_RISCV_GP:
- return "gp";
- case PERF_REG_RISCV_TP:
- return "tp";
- case PERF_REG_RISCV_T0:
- return "t0";
- case PERF_REG_RISCV_T1:
- return "t1";
- case PERF_REG_RISCV_T2:
- return "t2";
- case PERF_REG_RISCV_S0:
- return "s0";
- case PERF_REG_RISCV_S1:
- return "s1";
- case PERF_REG_RISCV_A0:
- return "a0";
- case PERF_REG_RISCV_A1:
- return "a1";
- case PERF_REG_RISCV_A2:
- return "a2";
- case PERF_REG_RISCV_A3:
- return "a3";
- case PERF_REG_RISCV_A4:
- return "a4";
- case PERF_REG_RISCV_A5:
- return "a5";
- case PERF_REG_RISCV_A6:
- return "a6";
- case PERF_REG_RISCV_A7:
- return "a7";
- case PERF_REG_RISCV_S2:
- return "s2";
- case PERF_REG_RISCV_S3:
- return "s3";
- case PERF_REG_RISCV_S4:
- return "s4";
- case PERF_REG_RISCV_S5:
- return "s5";
- case PERF_REG_RISCV_S6:
- return "s6";
- case PERF_REG_RISCV_S7:
- return "s7";
- case PERF_REG_RISCV_S8:
- return "s8";
- case PERF_REG_RISCV_S9:
- return "s9";
- case PERF_REG_RISCV_S10:
- return "s10";
- case PERF_REG_RISCV_S11:
- return "s11";
- case PERF_REG_RISCV_T3:
- return "t3";
- case PERF_REG_RISCV_T4:
- return "t4";
- case PERF_REG_RISCV_T5:
- return "t5";
- case PERF_REG_RISCV_T6:
- return "t6";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index ed9c5c2eafad..799147658dee 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -452,3 +452,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index ce3031526623..52fcc0891da6 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_S390_R0:
- return "R0";
- case PERF_REG_S390_R1:
- return "R1";
- case PERF_REG_S390_R2:
- return "R2";
- case PERF_REG_S390_R3:
- return "R3";
- case PERF_REG_S390_R4:
- return "R4";
- case PERF_REG_S390_R5:
- return "R5";
- case PERF_REG_S390_R6:
- return "R6";
- case PERF_REG_S390_R7:
- return "R7";
- case PERF_REG_S390_R8:
- return "R8";
- case PERF_REG_S390_R9:
- return "R9";
- case PERF_REG_S390_R10:
- return "R10";
- case PERF_REG_S390_R11:
- return "R11";
- case PERF_REG_S390_R12:
- return "R12";
- case PERF_REG_S390_R13:
- return "R13";
- case PERF_REG_S390_R14:
- return "R14";
- case PERF_REG_S390_R15:
- return "R15";
- case PERF_REG_S390_FP0:
- return "FP0";
- case PERF_REG_S390_FP1:
- return "FP1";
- case PERF_REG_S390_FP2:
- return "FP2";
- case PERF_REG_S390_FP3:
- return "FP3";
- case PERF_REG_S390_FP4:
- return "FP4";
- case PERF_REG_S390_FP5:
- return "FP5";
- case PERF_REG_S390_FP6:
- return "FP6";
- case PERF_REG_S390_FP7:
- return "FP7";
- case PERF_REG_S390_FP8:
- return "FP8";
- case PERF_REG_S390_FP9:
- return "FP9";
- case PERF_REG_S390_FP10:
- return "FP10";
- case PERF_REG_S390_FP11:
- return "FP11";
- case PERF_REG_S390_FP12:
- return "FP12";
- case PERF_REG_S390_FP13:
- return "FP13";
- case PERF_REG_S390_FP14:
- return "FP14";
- case PERF_REG_S390_FP15:
- return "FP15";
- case PERF_REG_S390_MASK:
- return "MASK";
- case PERF_REG_S390_PC:
- return "PC";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index fe8f8dd157b4..c84d12608cd2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,6 +371,7 @@
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index cddc4cdc0d9b..16e23b722042 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_X86_AX:
- return "AX";
- case PERF_REG_X86_BX:
- return "BX";
- case PERF_REG_X86_CX:
- return "CX";
- case PERF_REG_X86_DX:
- return "DX";
- case PERF_REG_X86_SI:
- return "SI";
- case PERF_REG_X86_DI:
- return "DI";
- case PERF_REG_X86_BP:
- return "BP";
- case PERF_REG_X86_SP:
- return "SP";
- case PERF_REG_X86_IP:
- return "IP";
- case PERF_REG_X86_FLAGS:
- return "FLAGS";
- case PERF_REG_X86_CS:
- return "CS";
- case PERF_REG_X86_SS:
- return "SS";
- case PERF_REG_X86_DS:
- return "DS";
- case PERF_REG_X86_ES:
- return "ES";
- case PERF_REG_X86_FS:
- return "FS";
- case PERF_REG_X86_GS:
- return "GS";
-#ifdef HAVE_ARCH_X86_64_SUPPORT
- case PERF_REG_X86_R8:
- return "R8";
- case PERF_REG_X86_R9:
- return "R9";
- case PERF_REG_X86_R10:
- return "R10";
- case PERF_REG_X86_R11:
- return "R11";
- case PERF_REG_X86_R12:
- return "R12";
- case PERF_REG_X86_R13:
- return "R13";
- case PERF_REG_X86_R14:
- return "R14";
- case PERF_REG_X86_R15:
- return "R15";
-#endif /* HAVE_ARCH_X86_64_SUPPORT */
-
-#define XMM(x) \
- case PERF_REG_X86_XMM ## x: \
- case PERF_REG_X86_XMM ## x + 1: \
- return "XMM" #x;
- XMM(0)
- XMM(1)
- XMM(2)
- XMM(3)
- XMM(4)
- XMM(5)
- XMM(6)
- XMM(7)
- XMM(8)
- XMM(9)
- XMM(10)
- XMM(11)
- XMM(12)
- XMM(13)
- XMM(14)
- XMM(15)
-#undef XMM
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 0b0951030a2f..f924246eff78 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
else
return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
}
+
+struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ struct evsel *evsel, *first;
+
+ first = list_first_entry(list, struct evsel, core.node);
+
+ if (!pmu_have_event("cpu", "slots"))
+ return first;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
+ evsel->name && strstr(evsel->name, "slots"))
+ return evsel;
+ }
+ return first;
+}
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index ddaca75c3bc0..740ae764537e 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
@@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv)
/* default to the number of CPUs */
if (!nthreads)
- nthreads = cpu->nr;
+ nthreads = perf_cpu_map__nr(cpu);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 79d13dbc0a47..37de970c9743 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
@@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv)
/* default to the number of CPUs and leave one for the writer pthread */
if (!nthreads)
- nthreads = cpu->nr - 1;
+ nthreads = perf_cpu_map__nr(cpu) - 1;
worker = calloc(nthreads, sizeof(*worker));
if (!worker) {
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index 482738e9bdad..de56601f69ee 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist)
int cnt = 0;
evlist__for_each_entry(evlist, evsel)
- cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+ cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus);
return cnt;
}
@@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr)
init_stats(&time_stats);
- printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr);
+ printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus));
printf(" Number of threads:\t%d\n", evlist->core.threads->nr);
printf(" Number of events:\t%d (%d fds)\n",
evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fcdea3e44937..dbcecec4eeda 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv)
}
if (!params.nthreads) /* default to the number of CPUs */
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv)
goto errmem;
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 137890f78e17..6fc9a3d55c1f 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
worker[i].futex = &global_futex;
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f7a5ffebb940..2f59d5d1c509 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -131,7 +131,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 0983f40b4b40..861deb934745 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
/* some sanity checks */
if (params.nwakes > params.nthreads ||
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 2226a475e782..cfda48bef1d7 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -105,7 +105,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index d0895162c2ba..d291f3a8af5f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -226,7 +226,6 @@ static void run_collection(struct collection *coll)
if (!bench->fn)
break;
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
- fflush(stdout);
argv[1] = bench->name;
run_bench(coll->name, bench->name, bench->fn, 1, argv);
@@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv)
struct collection *coll;
int ret = 0;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
if (argc < 2) {
/* No collection specified. */
print_usage();
@@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv)
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
- fflush(stdout);
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
goto end;
}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 0db3cfc04c47..cd381693658b 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -351,10 +351,14 @@ static int build_id_cache__show_all(void)
static int perf_buildid_cache_config(const char *var, const char *value, void *cb)
{
- const char **debuginfod = cb;
+ struct perf_debuginfod *di = cb;
- if (!strcmp(var, "buildid-cache.debuginfod"))
- *debuginfod = strdup(value);
+ if (!strcmp(var, "buildid-cache.debuginfod")) {
+ di->urls = strdup(value);
+ if (!di->urls)
+ return -ENOMEM;
+ di->set = true;
+ }
return 0;
}
@@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv)
*purge_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
- *kcore_filename = NULL,
- *debuginfod = NULL;
+ *kcore_filename = NULL;
+ struct perf_debuginfod debuginfod = { };
char sbuf[STRERR_BUFSIZE];
struct perf_data data = {
@@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
- OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url",
- "set debuginfod url"),
+ OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls,
+ &debuginfod.set, "debuginfod urls",
+ "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
+ "system"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
@@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv)
if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
- if (debuginfod) {
- pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod);
- setenv("DEBUGINFOD_URLS", debuginfod, 1);
- }
+ perf_debuginfod_setup(&debuginfod);
/* -l is exclusive. It can not be used with other options. */
if (list_files && opts_flag) {
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index b5c67ef73862..77dd4afacca4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session)
{
struct numa_node *n;
unsigned long **nodes;
- int node, cpu;
+ int node, idx;
+ struct perf_cpu cpu;
int *cpu2node;
if (c2c.node_info > 2)
@@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session)
if (!cpu2node)
return -ENOMEM;
- for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
- cpu2node[cpu] = -1;
+ for (idx = 0; idx < c2c.cpus_cnt; idx++)
+ cpu2node[idx] = -1;
c2c.cpu2node = cpu2node;
@@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session)
if (perf_cpu_map__empty(map))
continue;
- for (cpu = 0; cpu < map->nr; cpu++) {
- set_bit(map->map[cpu], set);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ set_bit(cpu.cpu, set);
- if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+ if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
return -EINVAL;
- cpu2node[map->map[cpu]] = node;
+ cpu2node[cpu.cpu] = node;
}
}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 87cb11a7a3ee..dec24dc0e767 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <math.h>
#include <poll.h>
+#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@@ -28,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
-struct perf_ftrace {
- struct evlist *evlist;
- struct target target;
- const char *tracer;
- struct list_head filters;
- struct list_head notrace;
- struct list_head graph_funcs;
- struct list_head nograph_funcs;
- int graph_depth;
- unsigned long percpu_buffer_size;
- bool inherit;
- int func_stack_trace;
- int func_irq_info;
- int graph_nosleep_time;
- int graph_noirqs;
- int graph_verbose;
- int graph_thresh;
- unsigned int initial_delay;
-};
-
-struct filter_entry {
- struct list_head list;
- char name[];
-};
-
static volatile int workload_exec_errno;
static bool done;
@@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
int ret;
int last_cpu;
- last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
+ last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
@@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
-static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
- signal(SIGINT, sig_handler);
- signal(SIGUSR1, sig_handler);
- signal(SIGCHLD, sig_handler);
- signal(SIGPIPE, sig_handler);
+ select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
- if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
- ftrace__workload_exec_failed_signal) < 0) {
- goto out;
- }
-
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@@ -693,6 +680,270 @@ out:
return (done && !workload_exec_errno) ? 0 : -1;
}
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+{
+ char *p, *q;
+ char *unit;
+ double num;
+ int i;
+
+ /* ensure NUL termination */
+ buf[len] = '\0';
+
+ /* handle data line by line */
+ for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
+ *q = '\0';
+ /* move it to the line buffer */
+ strcat(linebuf, p);
+
+ /*
+ * parse trace output to get function duration like in
+ *
+ * # tracer: function_graph
+ * #
+ * # CPU DURATION FUNCTION CALLS
+ * # | | | | | | |
+ * 1) + 10.291 us | do_filp_open();
+ * 1) 4.889 us | do_filp_open();
+ * 1) 6.086 us | do_filp_open();
+ *
+ */
+ if (linebuf[0] == '#')
+ goto next;
+
+ /* ignore CPU */
+ p = strchr(linebuf, ')');
+ if (p == NULL)
+ p = linebuf;
+
+ while (*p && !isdigit(*p) && (*p != '|'))
+ p++;
+
+ /* no duration */
+ if (*p == '\0' || *p == '|')
+ goto next;
+
+ num = strtod(p, &unit);
+ if (!unit || strncmp(unit, " us", 3))
+ goto next;
+
+ i = log2(num);
+ if (i < 0)
+ i = 0;
+ if (i >= NUM_BUCKET)
+ i = NUM_BUCKET - 1;
+
+ buckets[i]++;
+
+next:
+ /* empty the line buffer for the next output */
+ linebuf[0] = '\0';
+ }
+
+ /* preserve any remaining output (before newline) */
+ strcat(linebuf, p);
+}
+
+static void display_histogram(int buckets[])
+{
+ int i;
+ int total = 0;
+ int bar_total = 46; /* to fit in 80 column */
+ char bar[] = "###############################################";
+ int bar_len;
+
+ for (i = 0; i < NUM_BUCKET; i++)
+ total += buckets[i];
+
+ if (total == 0) {
+ printf("No data found\n");
+ return;
+ }
+
+ printf("# %14s | %10s | %-*s |\n",
+ " DURATION ", "COUNT", bar_total, "GRAPH");
+
+ bar_len = buckets[0] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
+
+ for (i = 1; i < NUM_BUCKET - 1; i++) {
+ int start = (1 << (i - 1));
+ int stop = 1 << i;
+ const char *unit = "us";
+
+ if (start >= 1024) {
+ start >>= 10;
+ stop >>= 10;
+ unit = "ms";
+ }
+ bar_len = buckets[i] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ start, stop, unit, buckets[i], bar_len, bar,
+ bar_total - bar_len, "");
+ }
+
+ bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+ printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
+ 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
+ bar_total - bar_len, "");
+
+}
+
+static int prepare_func_latency(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int fd;
+
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_prepare_bpf(ftrace);
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ return -1;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ return -1;
+
+ if (set_tracing_options(ftrace) < 0)
+ return -1;
+
+ /* force to use the function_graph tracer to track duration */
+ if (write_tracing_file("current_tracer", "function_graph") < 0) {
+ pr_err("failed to set current_tracer to function_graph\n");
+ return -1;
+ }
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ return -1;
+ }
+
+ fd = open(trace_file, O_RDONLY);
+ if (fd < 0)
+ pr_err("failed to open trace_pipe\n");
+
+ put_tracing_file(trace_file);
+ return fd;
+}
+
+static int start_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_start_bpf(ftrace);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int stop_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_stop_bpf(ftrace);
+
+ write_tracing_file("tracing_on", "0");
+ return 0;
+}
+
+static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_read_bpf(ftrace, buckets);
+
+ return 0;
+}
+
+static int cleanup_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_cleanup_bpf(ftrace);
+
+ reset_tracing_files(ftrace);
+ return 0;
+}
+
+static int __cmd_latency(struct perf_ftrace *ftrace)
+{
+ int trace_fd;
+ char buf[4096];
+ char line[256];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+ int buckets[NUM_BUCKET] = { };
+
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
+ }
+
+ trace_fd = prepare_func_latency(ftrace);
+ if (trace_fd < 0)
+ goto out;
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+ pollfd.fd = trace_fd;
+
+ if (start_func_latency(ftrace) < 0)
+ goto out;
+
+ evlist__start_workload(ftrace->evlist);
+
+ line[0] = '\0';
+ while (!done) {
+ if (poll(&pollfd, 1, -1) < 0)
+ break;
+
+ if (pollfd.revents & POLLIN) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n < 0)
+ break;
+
+ make_histogram(buckets, buf, n, line);
+ }
+ }
+
+ stop_func_latency(ftrace);
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ pr_err("workload failed: %s\n", emsg);
+ goto out;
+ }
+
+ /* read remaining buffer contents */
+ while (!ftrace->target.use_bpf) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n <= 0)
+ break;
+ make_histogram(buckets, buf, n, line);
+ }
+
+ read_func_latency(ftrace, buckets);
+
+ display_histogram(buckets);
+
+out:
+ close(trace_fd);
+ cleanup_func_latency(ftrace);
+
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -855,22 +1106,11 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
-static void select_tracer(struct perf_ftrace *ftrace)
-{
- bool graph = !list_empty(&ftrace->graph_funcs) ||
- !list_empty(&ftrace->nograph_funcs);
- bool func = !list_empty(&ftrace->filters) ||
- !list_empty(&ftrace->notrace);
-
- /* The function_graph has priority over function tracer. */
- if (graph)
- ftrace->tracer = "function_graph";
- else if (func)
- ftrace->tracer = "function";
- /* Otherwise, the default tracer is used. */
-
- pr_debug("%s tracer is used\n", ftrace->tracer);
-}
+enum perf_ftrace_subcommand {
+ PERF_FTRACE_NONE,
+ PERF_FTRACE_TRACE,
+ PERF_FTRACE_LATENCY,
+};
int cmd_ftrace(int argc, const char **argv)
{
@@ -879,17 +1119,7 @@ int cmd_ftrace(int argc, const char **argv)
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
- const char * const ftrace_usage[] = {
- "perf ftrace [<options>] [<command>]",
- "perf ftrace [<options>] -- <command> [<options>]",
- NULL
- };
- const struct option ftrace_options[] = {
- OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "Tracer to use: function_graph(default) or function"),
- OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
- "Show available functions to filter",
- opt_list_avail_functions, "*"),
+ const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@@ -901,6 +1131,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
+ OPT_END()
+ };
+ const struct option ftrace_options[] = {
+ OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+ "Tracer to use: function_graph(default) or function"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@@ -923,24 +1161,65 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
- OPT_END()
+ OPT_PARENT(common_options),
+ };
+ const struct option latency_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Show latency of given function", parse_filter_func),
+#ifdef HAVE_BPF_SKEL
+ OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
+ "Use BPF to measure function latency"),
+#endif
+ OPT_PARENT(common_options),
+ };
+ const struct option *options = ftrace_options;
+
+ const char * const ftrace_usage[] = {
+ "perf ftrace [<options>] [<command>]",
+ "perf ftrace [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency} [<options>] [<command>]",
+ "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
+ NULL
};
+ enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
+ signal(SIGINT, sig_handler);
+ signal(SIGUSR1, sig_handler);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGPIPE, sig_handler);
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
- argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && target__none(&ftrace.target))
- ftrace.target.system_wide = true;
+ if (argc > 1) {
+ if (!strcmp(argv[1], "trace")) {
+ subcmd = PERF_FTRACE_TRACE;
+ } else if (!strcmp(argv[1], "latency")) {
+ subcmd = PERF_FTRACE_LATENCY;
+ options = latency_options;
+ }
+
+ if (subcmd != PERF_FTRACE_NONE) {
+ argc--;
+ argv++;
+ }
+ }
+ /* for backward compatibility */
+ if (subcmd == PERF_FTRACE_NONE)
+ subcmd = PERF_FTRACE_TRACE;
- select_tracer(&ftrace);
+ argc = parse_options(argc, argv, options, ftrace_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc < 0) {
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
ret = target__validate(&ftrace.target);
if (ret) {
@@ -961,7 +1240,35 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
- ret = __cmd_ftrace(&ftrace, argc, argv);
+ if (argc) {
+ ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
+ argv, false,
+ ftrace__workload_exec_failed_signal);
+ if (ret < 0)
+ goto out_delete_evlist;
+ }
+
+ switch (subcmd) {
+ case PERF_FTRACE_TRACE:
+ if (!argc && target__none(&ftrace.target))
+ ftrace.target.system_wide = true;
+ ret = __cmd_ftrace(&ftrace);
+ break;
+ case PERF_FTRACE_LATENCY:
+ if (list_empty(&ftrace.filters)) {
+ pr_err("Should provide a function to measure\n");
+ parse_options_usage(ftrace_usage, options, "T", 1);
+ ret = -EINVAL;
+ goto out_delete_evlist;
+ }
+ ret = __cmd_latency(&ftrace);
+ break;
+ case PERF_FTRACE_NONE:
+ default:
+ pr_err("Invalid subcommand\n");
+ ret = -EINVAL;
+ break;
+ }
out_delete_evlist:
evlist__delete(ftrace.evlist);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 409b721666cb..fbf43a454cba 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
static int perf_event__repipe_tracing_data(struct perf_session *session,
union perf_event *event)
{
- int err;
-
perf_event__repipe_synth(session->tool, event);
- err = perf_event__process_tracing_data(session, event);
- return err;
+ return perf_event__process_tracing_data(session, event);
}
static int dso__read_build_id(struct dso *dso)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index da03a341c63c..99d7ff9a8eff 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp
int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
- int node1 = cpu__get_node(sample->cpu),
+ int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0338b813585a..bb716c953d02 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -111,6 +111,7 @@ struct record {
unsigned long long samples;
struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
+ struct perf_debuginfod debuginfod;
};
static volatile int done;
@@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
rec->opts.nr_cblocks = nr_cblocks_default;
}
#endif
+ if (!strcmp(var, "record.debuginfod")) {
+ rec->debuginfod.urls = strdup(value);
+ if (!rec->debuginfod.urls)
+ return -ENOMEM;
+ rec->debuginfod.set = true;
+ }
return 0;
}
@@ -2267,6 +2274,10 @@ out_free:
return ret;
}
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2663,6 +2674,10 @@ static struct option __record_options[] = {
parse_control_option),
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
"Fine-tune event synthesis: default=all", parse_record_synth_option),
+ OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
+ &record.debuginfod.set, "debuginfod urls",
+ "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
+ "system"),
OPT_END()
};
@@ -2716,6 +2731,8 @@ int cmd_record(int argc, const char **argv)
if (err)
return err;
+ perf_debuginfod_setup(&record.debuginfod);
+
/* Make system wide (-a) the default target. */
if (!argc && target__none(&rec->opts.target))
rec->opts.target.system_wide = true;
@@ -2792,7 +2809,7 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
- rec->affinity_mask.nbits = cpu__max_cpu();
+ rec->affinity_mask.nbits = cpu__max_cpu().cpu;
rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
@@ -2898,6 +2915,10 @@ int cmd_record(int argc, const char **argv)
}
rec->opts.target.hybrid = perf_pmu__has_hybrid();
+
+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8ae400429870..1dd92d8c9279 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
}
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
return 0;
}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 4527f632ebe4..72d446de9c60 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -167,7 +167,7 @@ struct trace_sched_handler {
struct perf_sched_map {
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
- int *comp_cpus;
+ struct perf_cpu *comp_cpus;
bool comp;
struct perf_thread_map *color_pids;
const char *color_pids_str;
@@ -191,7 +191,7 @@ struct perf_sched {
* Track the current task - that way we can know whether there's any
* weird events, such as a task being switched away that is not current.
*/
- int max_cpu;
+ struct perf_cpu max_cpu;
u32 curr_pid[MAX_CPUS];
struct thread *curr_thread[MAX_CPUS];
char next_shortname1;
@@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
- int i, this_cpu = sample->cpu;
+ int i;
+ struct perf_cpu this_cpu = {
+ .cpu = sample->cpu,
+ };
int cpus_nr;
bool new_cpu = false;
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
- BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
+ BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
- if (this_cpu > sched->max_cpu)
+ if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (sched->map.comp) {
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
- if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+ if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
sched->map.comp_cpus[cpus_nr++] = this_cpu;
new_cpu = true;
}
} else
- cpus_nr = sched->max_cpu;
+ cpus_nr = sched->max_cpu.cpu;
- timestamp0 = sched->cpu_last_switched[this_cpu];
- sched->cpu_last_switched[this_cpu] = timestamp;
+ timestamp0 = sched->cpu_last_switched[this_cpu.cpu];
+ sched->cpu_last_switched[this_cpu.cpu] = timestamp;
if (timestamp0)
delta = timestamp - timestamp0;
else
@@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
return -1;
}
- sched->curr_thread[this_cpu] = thread__get(sched_in);
+ sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
printf(" ");
@@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
}
for (i = 0; i < cpus_nr; i++) {
- int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
- struct thread *curr_thread = sched->curr_thread[cpu];
+ struct perf_cpu cpu = {
+ .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
+ };
+ struct thread *curr_thread = sched->curr_thread[cpu.cpu];
struct thread_runtime *curr_tr;
const char *pid_color = color;
const char *cpu_color = color;
@@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
if (curr_thread && thread__has_color(curr_thread))
pid_color = COLOR_PIDS;
- if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+ if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu))
continue;
- if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+ if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu))
cpu_color = COLOR_CPUS;
- if (cpu != this_cpu)
+ if (cpu.cpu != this_cpu.cpu)
color_fprintf(stdout, color, " ");
else
color_fprintf(stdout, cpu_color, "*");
- if (sched->curr_thread[cpu]) {
- curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+ if (sched->curr_thread[cpu.cpu]) {
+ curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]);
if (curr_tr == NULL) {
thread__put(sched_in);
return -1;
@@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
color_fprintf(stdout, color, " ");
}
- if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+ if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
goto out;
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
@@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread)
static void timehist_header(struct perf_sched *sched)
{
- u32 ncpus = sched->max_cpu + 1;
+ u32 ncpus = sched->max_cpu.cpu + 1;
u32 i, j;
printf("%15s %6s ", "time", "cpu");
@@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched,
struct thread_runtime *tr = thread__priv(thread);
const char *next_comm = evsel__strval(evsel, sample, "next_comm");
const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
- u32 max_cpus = sched->max_cpu + 1;
+ u32 max_cpus = sched->max_cpu.cpu + 1;
char tstr[64];
char nstr[30];
u64 wait_time;
@@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
if (sched->show_cpu_visual)
- printf(" %*s ", sched->max_cpu + 1, "");
+ printf(" %*s ", sched->max_cpu.cpu + 1, "");
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
@@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched,
{
struct thread *thread;
char tstr[64];
- u32 max_cpus = sched->max_cpu + 1;
+ u32 max_cpus;
u32 ocpu, dcpu;
if (sched->summary_only)
return;
- max_cpus = sched->max_cpu + 1;
+ max_cpus = sched->max_cpu.cpu + 1;
ocpu = evsel__intval(evsel, sample, "orig_cpu");
dcpu = evsel__intval(evsel, sample, "dest_cpu");
@@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
- printf(" (x %d)\n", sched->max_cpu);
+ printf(" (x %d)\n", sched->max_cpu.cpu);
}
typedef int (*sched_handler)(struct perf_tool *tool,
@@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int err = 0;
- int this_cpu = sample->cpu;
+ struct perf_cpu this_cpu = {
+ .cpu = sample->cpu,
+ };
- if (this_cpu > sched->max_cpu)
+ if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (evsel->handler != NULL) {
@@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched)
goto out;
/* pre-allocate struct for per-CPU idle stats */
- sched->max_cpu = session->header.env.nr_cpus_online;
- if (sched->max_cpu == 0)
- sched->max_cpu = 4;
- if (init_idle_threads(sched->max_cpu))
+ sched->max_cpu.cpu = session->header.env.nr_cpus_online;
+ if (sched->max_cpu.cpu == 0)
+ sched->max_cpu.cpu = 4;
+ if (init_idle_threads(sched->max_cpu.cpu))
goto out;
/* summary_only implies summary option, but don't overwrite summary if set */
@@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched)
{
struct perf_cpu_map *map;
- sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+ sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF);
if (sched->map.comp) {
- sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+ sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int));
if (!sched->map.comp_cpus)
return -1;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c82b033e8942..abae8184e171 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -15,6 +15,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
+#include "util/env.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
@@ -514,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
- evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_PAGE_SIZE) &&
@@ -648,7 +649,7 @@ out:
return 0;
}
-static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
+static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
FILE *fp)
{
unsigned i = 0, r;
@@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
- printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
}
return printed;
@@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
- attr->sample_regs_intr, fp);
+ attr->sample_regs_intr, arch, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
- attr->sample_regs_user, fp);
+ attr->sample_regs_user, arch, fp);
}
static int perf_sample__fprintf_start(struct perf_script *script,
@@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script,
struct evsel_script *es = evsel->priv;
FILE *fp = es->fp;
char str[PAGE_SIZE_NAME_LEN];
+ const char *arch = perf_env__arch(machine->env);
if (output[type].fields == 0)
return;
@@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(IREGS))
- perf_sample__fprintf_iregs(sample, attr, fp);
+ perf_sample__fprintf_iregs(sample, attr, arch, fp);
if (PRINT_FIELD(UREGS))
- perf_sample__fprintf_uregs(sample, attr, fp);
+ perf_sample__fprintf_uregs(sample, attr, arch, fp);
if (PRINT_FIELD(BRSTACK))
perf_sample__fprintf_brstack(sample, thread, attr, fp);
@@ -2113,8 +2115,8 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = evsel__nr_cpus(counter);
- int cpu, thread;
+ int idx, thread;
+ struct perf_cpu cpu;
static int header_printed;
if (counter->core.system_wide)
@@ -2127,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
}
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
struct perf_counts_values *counts;
- counts = perf_counts(counter->counts, cpu, thread);
+ counts = perf_counts(counter->counts, idx, thread);
printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
- counter->core.cpus->map[cpu],
+ cpu.cpu,
perf_thread_map__pid(counter->core.threads, thread),
counts->val,
counts->ena,
@@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
- }
+ callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7974933dbc77..3f98689dd687 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b)
if (!a->core.cpus || !b->core.cpus)
return false;
- if (a->core.cpus->nr != b->core.cpus->nr)
+ if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
return false;
- for (int i = 0; i < a->core.cpus->nr; i++) {
- if (a->core.cpus->map[i] != b->core.cpus->map[i])
+ for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
+ if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
+ perf_cpu_map__cpu(b->core.cpus, i).cpu)
return false;
}
@@ -327,34 +328,35 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
-static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
+static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
struct perf_counts_values *count)
{
- struct perf_sample_id *sid = SID(counter, cpu, thread);
+ struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
process_synthesized_event, NULL);
}
-static int read_single_counter(struct evsel *counter, int cpu,
+static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs)
{
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
struct perf_counts_values *count =
- perf_counts(counter->counts, cpu, thread);
+ perf_counts(counter->counts, cpu_map_idx, thread);
count->ena = count->run = val;
count->val = val;
return 0;
}
- return evsel__read_counter(counter, cpu, thread);
+ return evsel__read_counter(counter, cpu_map_idx, thread);
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int thread;
@@ -368,24 +370,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
- count = perf_counts(counter->counts, cpu, thread);
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
/*
* The leader's group read loads data into its group members
* (via evsel__read_counter()) and sets their count->loaded.
*/
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
+ if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
+ read_single_counter(counter, cpu_map_idx, thread, rs)) {
counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
+ perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
+ perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
return -1;
}
- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
if (STAT_RECORD) {
- if (evsel__write_stat_event(counter, cpu, thread, count)) {
+ if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@@ -395,7 +397,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
evsel__name(counter),
- cpu,
+ perf_cpu_map__cpu(evsel__cpus(counter),
+ cpu_map_idx).cpu,
count->val, count->ena, count->run);
}
}
@@ -405,36 +408,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_affinity_counters(struct timespec *rs)
{
- struct evsel *counter;
- struct affinity affinity;
- int i, ncpus, cpu;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity;
if (all_counters_use_bpf)
return 0;
- if (affinity__setup(&affinity) < 0)
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ affinity = NULL;
+ else if (affinity__setup(&saved_affinity) < 0)
return -1;
+ else
+ affinity = &saved_affinity;
- ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
- if (!target__has_cpu(&target) || target__has_per_thread(&target))
- ncpus = 1;
- evlist__for_each_cpu(evsel_list, i, cpu) {
- if (i >= ncpus)
- break;
- affinity__set(&affinity, cpu);
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ struct evsel *counter = evlist_cpu_itr.evsel;
- evlist__for_each_entry(evsel_list, counter) {
- if (evsel__cpu_iter_skip(counter, cpu))
- continue;
- if (evsel__is_bpf(counter))
- continue;
- if (!counter->err) {
- counter->err = read_counter_cpu(counter, rs,
- counter->cpu_iter - 1);
- }
+ if (evsel__is_bpf(counter))
+ continue;
+
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ evlist_cpu_itr.cpu_map_idx);
}
}
- affinity__cleanup(&affinity);
+ if (affinity)
+ affinity__cleanup(&saved_affinity);
+
return 0;
}
@@ -788,8 +788,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
- struct affinity affinity;
- int i, cpu, err;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
+ int err;
bool second_pass = false;
if (forks) {
@@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
evlist__set_leader(evsel_list);
- if (affinity__setup(&affinity) < 0)
- return -1;
+ if (!cpu_map__is_dummy(evsel_list->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return -1;
+ affinity = &saved_affinity;
+ }
evlist__for_each_entry(evsel_list, counter) {
if (bpf_counter__load(counter, &target))
@@ -813,103 +817,98 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
- evlist__for_each_cpu (evsel_list, i, cpu) {
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
/*
* bperf calls evsel__open_per_cpu() in bperf__load(), so
* no need to call it again here.
*/
if (target.use_bpf)
break;
- affinity__set(&affinity, cpu);
- evlist__for_each_entry(evsel_list, counter) {
- if (evsel__cpu_iter_skip(counter, cpu))
+ if (counter->reset_group || counter->errored)
+ continue;
+ if (evsel__is_bpf(counter))
+ continue;
+try_again:
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ evlist_cpu_itr.cpu_map_idx) < 0) {
+
+ /*
+ * Weak group failed. We cannot just undo this here
+ * because earlier CPUs might be in group mode, and the kernel
+ * doesn't support mixing group and non group reads. Defer
+ * it to later.
+ * Don't close here because we're in the wrong affinity.
+ */
+ if ((errno == EINVAL || errno == EBADF) &&
+ evsel__leader(counter) != counter &&
+ counter->weak_group) {
+ evlist__reset_weak_group(evsel_list, counter, false);
+ assert(counter->reset_group);
+ second_pass = true;
continue;
- if (counter->reset_group || counter->errored)
+ }
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+ goto try_again;
+ case COUNTER_SKIP:
continue;
- if (evsel__is_bpf(counter))
+ default:
+ break;
+ }
+
+ }
+ counter->supported = true;
+ }
+
+ if (second_pass) {
+ /*
+ * Now redo all the weak group after closing them,
+ * and also close errored counters.
+ */
+
+ /* First close errored or weak retry */
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
+ if (!counter->reset_group && !counter->errored)
continue;
-try_again:
+
+ perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
+ }
+ /* Now reopen weak */
+ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
+ counter = evlist_cpu_itr.evsel;
+
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (!counter->reset_group)
+ continue;
+try_again_reset:
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
- counter->cpu_iter - 1) < 0) {
-
- /*
- * Weak group failed. We cannot just undo this here
- * because earlier CPUs might be in group mode, and the kernel
- * doesn't support mixing group and non group reads. Defer
- * it to later.
- * Don't close here because we're in the wrong affinity.
- */
- if ((errno == EINVAL || errno == EBADF) &&
- evsel__leader(counter) != counter &&
- counter->weak_group) {
- evlist__reset_weak_group(evsel_list, counter, false);
- assert(counter->reset_group);
- second_pass = true;
- continue;
- }
+ evlist_cpu_itr.cpu_map_idx) < 0) {
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
- goto try_again;
+ goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
-
}
counter->supported = true;
}
}
-
- if (second_pass) {
- /*
- * Now redo all the weak group after closing them,
- * and also close errored counters.
- */
-
- evlist__for_each_cpu(evsel_list, i, cpu) {
- affinity__set(&affinity, cpu);
- /* First close errored or weak retry */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->reset_group && !counter->errored)
- continue;
- if (evsel__cpu_iter_skip_no_inc(counter, cpu))
- continue;
- perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
- }
- /* Now reopen weak */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->reset_group && !counter->errored)
- continue;
- if (evsel__cpu_iter_skip(counter, cpu))
- continue;
- if (!counter->reset_group)
- continue;
-try_again_reset:
- pr_debug2("reopening weak %s\n", evsel__name(counter));
- if (create_perf_stat_counter(counter, &stat_config, &target,
- counter->cpu_iter - 1) < 0) {
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- return -1;
- case COUNTER_RETRY:
- goto try_again_reset;
- case COUNTER_SKIP:
- continue;
- default:
- break;
- }
- }
- counter->supported = true;
- }
- }
- }
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
@@ -1168,6 +1167,26 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
+static int parse_hybrid_type(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+
+ if (!list_empty(&evlist->core.entries)) {
+ fprintf(stderr, "Must define cputype before events/metrics\n");
+ return -1;
+ }
+
+ evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
+ if (!evlist->hybrid_pmu_name) {
+ fprintf(stderr, "--cputype %s is not supported!\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1282,6 +1301,10 @@ static struct option stat_options[] = {
"don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
+ OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
+ "Only enable events on applying cpu with this type "
+ "for hybrid platform (e.g. core or atom)",
+ parse_hybrid_type),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@@ -1298,70 +1321,75 @@ static struct option stat_options[] = {
OPT_END()
};
+static const char *const aggr_mode__string[] = {
+ [AGGR_CORE] = "core",
+ [AGGR_DIE] = "die",
+ [AGGR_GLOBAL] = "global",
+ [AGGR_NODE] = "node",
+ [AGGR_NONE] = "none",
+ [AGGR_SOCKET] = "socket",
+ [AGGR_THREAD] = "thread",
+ [AGGR_UNSET] = "unset",
+};
+
static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_socket(map, cpu, NULL);
+ return aggr_cpu_id__socket(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_die(map, cpu, NULL);
+ return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_core(map, cpu, NULL);
+ return aggr_cpu_id__core(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int cpu)
+ struct perf_cpu cpu)
{
- return cpu_map__get_node(map, cpu, NULL);
+ return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
- aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
+ aggr_get_id_t get_id, struct perf_cpu cpu)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (idx >= map->nr)
- return id;
+ if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
+ config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
- cpu = map->map[idx];
-
- if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
- config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
-
- id = config->cpus_aggr_map->map[cpu];
+ id = config->cpus_aggr_map->map[cpu.cpu];
return id;
}
static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
}
static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
}
static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
+ return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
static bool term_percore_set(void)
@@ -1376,54 +1404,67 @@ static bool term_percore_set(void)
return false;
}
-static int perf_stat_init_aggr_mode(void)
+static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{
- int nr;
+ switch (aggr_mode) {
+ case AGGR_SOCKET:
+ return aggr_cpu_id__socket;
+ case AGGR_DIE:
+ return aggr_cpu_id__die;
+ case AGGR_CORE:
+ return aggr_cpu_id__core;
+ case AGGR_NODE:
+ return aggr_cpu_id__node;
+ case AGGR_NONE:
+ if (term_percore_set())
+ return aggr_cpu_id__core;
+
+ return NULL;
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ return NULL;
+ }
+}
- switch (stat_config.aggr_mode) {
+static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
+{
+ switch (aggr_mode) {
case AGGR_SOCKET:
- if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build socket map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_socket_cached;
- break;
+ return perf_stat__get_socket_cached;
case AGGR_DIE:
- if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build die map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_die_cached;
- break;
+ return perf_stat__get_die_cached;
case AGGR_CORE:
- if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_cached;
- break;
+ return perf_stat__get_core_cached;
case AGGR_NODE:
- if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_node_cached;
- break;
+ return perf_stat__get_node_cached;
case AGGR_NONE:
if (term_percore_set()) {
- if (cpu_map__build_core_map(evsel_list->core.cpus,
- &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_cached;
+ return perf_stat__get_core_cached;
}
- break;
+ return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
- break;
+ return NULL;
+ }
+}
+
+static int perf_stat_init_aggr_mode(void)
+{
+ int nr;
+ aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
+
+ if (get_id) {
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus,
+ get_id, /*data=*/NULL);
+ if (!stat_config.aggr_map) {
+ pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ return -1;
+ }
+ stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
/*
@@ -1431,7 +1472,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- nr = perf_cpu_map__max(evsel_list->core.cpus);
+ nr = perf_cpu_map__max(evsel_list->core.cpus).cpu;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
@@ -1459,169 +1500,139 @@ static void perf_stat__exit_aggr_mode(void)
stat_config.cpus_aggr_map = NULL;
}
-static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
-{
- int cpu;
-
- if (idx > map->nr)
- return -1;
-
- cpu = map->map[idx];
-
- if (cpu >= env->nr_cpus_avail)
- return -1;
-
- return cpu;
-}
-
-static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- int cpu = perf_env__get_cpu(env, map, idx);
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1)
- id.socket = env->cpu[cpu].socket_id;
+ if (cpu.cpu != -1)
+ id.socket = env->cpu[cpu.cpu].socket_id;
return id;
}
-static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
- int cpu = perf_env__get_cpu(env, map, idx);
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1) {
+ if (cpu.cpu != -1) {
/*
* die_id is relative to socket, so start
* with the socket ID and then add die to
* make a unique ID.
*/
- id.socket = env->cpu[cpu].socket_id;
- id.die = env->cpu[cpu].die_id;
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
}
return id;
}
-static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
- int cpu = perf_env__get_cpu(env, map, idx);
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (cpu != -1) {
+ if (cpu.cpu != -1) {
/*
* core_id is relative to socket and die,
* we need a global id. So we set
* socket, die id and core id
*/
- id.socket = env->cpu[cpu].socket_id;
- id.die = env->cpu[cpu].die_id;
- id.core = env->cpu[cpu].core_id;
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ id.core = env->cpu[cpu.cpu].core_id;
}
return id;
}
-static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
- int cpu = perf_env__get_cpu(data, map, idx);
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
id.node = perf_env__numa_node(data, cpu);
return id;
}
-static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **sockp)
-{
- return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
-}
-
-static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **diep)
-{
- return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
-}
-
-static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **corep)
-{
- return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
-}
-
-static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
- struct cpu_aggr_map **nodep)
-{
- return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
-}
-
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_die(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
- struct perf_cpu_map *map, int idx)
+ struct perf_cpu cpu)
{
- return perf_env__get_node(map, idx, &perf_stat.session->header.env);
+ return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
-static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
- struct perf_env *env = &st->session->header.env;
+ switch (aggr_mode) {
+ case AGGR_SOCKET:
+ return perf_env__get_socket_aggr_by_cpu;
+ case AGGR_DIE:
+ return perf_env__get_die_aggr_by_cpu;
+ case AGGR_CORE:
+ return perf_env__get_core_aggr_by_cpu;
+ case AGGR_NODE:
+ return perf_env__get_node_aggr_by_cpu;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ return NULL;
+ }
+}
- switch (stat_config.aggr_mode) {
+static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
+{
+ switch (aggr_mode) {
case AGGR_SOCKET:
- if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build socket map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_socket_file;
- break;
+ return perf_stat__get_socket_file;
case AGGR_DIE:
- if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build die map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_die_file;
- break;
+ return perf_stat__get_die_file;
case AGGR_CORE:
- if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_core_file;
- break;
+ return perf_stat__get_core_file;
case AGGR_NODE:
- if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
- perror("cannot build core map");
- return -1;
- }
- stat_config.aggr_get_id = perf_stat__get_node_file;
- break;
+ return perf_stat__get_node_file;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
- break;
+ return NULL;
}
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+ struct perf_env *env = &st->session->header.env;
+ aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
+
+ if (!get_id)
+ return 0;
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env);
+ if (!stat_config.aggr_map) {
+ pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ return -1;
+ }
+ stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
return 0;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index df9fc00b4cd6..32844d8a0ea5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
offset = format_field__intval(field, sample, evsel->needs_swap);
syscall_arg.len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
val = (uintptr_t)(sample->raw_data + offset);
@@ -3962,6 +3964,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__add(evlist, pgfault_min);
}
+ /* Enable ignoring missing threads when -u/-p option is defined. */
+ trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
+
if (trace->sched &&
evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c
index 7565a1852c74..b17eb52a0694 100644
--- a/tools/perf/dlfilters/dlfilter-test-api-v0.c
+++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c
@@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
- struct filter_data *d = data;
-
pr_debug("%s API\n", __func__);
return do_checks(data, sample, ctx, false);
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
new file mode 100644
index 000000000000..79f2016c53b0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
@@ -0,0 +1,8 @@
+[
+ {
+ "ArchStdEvent": "BR_MIS_PRED"
+ },
+ {
+ "ArchStdEvent": "BR_PRED"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
new file mode 100644
index 000000000000..579c1c993d17
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
@@ -0,0 +1,20 @@
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS"
+ },
+ {
+ "ArchStdEvent": "BUS_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "CNT_CYCLES"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
new file mode 100644
index 000000000000..0141f749bff3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
@@ -0,0 +1,155 @@
+[
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB"
+ },
+ {
+ "ArchStdEvent": "DTLB_WALK"
+ },
+ {
+ "ArchStdEvent": "ITLB_WALK"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_MISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_LMISS"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_LMISS_RD"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
new file mode 100644
index 000000000000..344a2d552ad5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
@@ -0,0 +1,47 @@
+[
+ {
+ "ArchStdEvent": "EXC_TAKEN"
+ },
+ {
+ "ArchStdEvent": "MEMORY_ERROR"
+ },
+ {
+ "ArchStdEvent": "EXC_UNDEF"
+ },
+ {
+ "ArchStdEvent": "EXC_SVC"
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_SMC"
+ },
+ {
+ "ArchStdEvent": "EXC_HVC"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_OTHER"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_FIQ"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
new file mode 100644
index 000000000000..e57cd55937c6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
@@ -0,0 +1,143 @@
+[
+ {
+ "ArchStdEvent": "SW_INCR"
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "EXC_RETURN"
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "TTBR_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_PASS_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_FAIL_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC"
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC"
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_HP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT8_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT16_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT32_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT64_SPEC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
new file mode 100644
index 000000000000..e522113aeb96
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
@@ -0,0 +1,38 @@
+[
+ {
+ "ArchStdEvent": "MEM_ACCESS"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "LD_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "ST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
new file mode 100644
index 000000000000..20d8365756c5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
@@ -0,0 +1,5 @@
+[
+ {
+ "ArchStdEvent": "REMOTE_ACCESS"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
new file mode 100644
index 000000000000..f9fae15f7555
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
@@ -0,0 +1,23 @@
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND_MEM"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
new file mode 100644
index 000000000000..20f2165c85fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "SAMPLE_POP"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FEED"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FILTRATE"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_COLLISION"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
new file mode 100644
index 000000000000..3116135c59e2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
@@ -0,0 +1,29 @@
+[
+ {
+ "ArchStdEvent": "TRB_WRAP"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT0"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT1"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT2"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT3"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT4"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT5"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT6"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT7"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
index 423767510aff..80d7a70829a0 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
+++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
@@ -300,6 +300,30 @@
"BriefDescription": "No operation sent for execution on a slot"
},
{
+ "PublicDescription": "Sample Population",
+ "EventCode": "0x4000",
+ "EventName": "SAMPLE_POP",
+ "BriefDescription": "Sample Population"
+ },
+ {
+ "PublicDescription": "Sample Taken",
+ "EventCode": "0x4001",
+ "EventName": "SAMPLE_FEED",
+ "BriefDescription": "Sample Taken"
+ },
+ {
+ "PublicDescription": "Sample Taken and not removed by filtering",
+ "EventCode": "0x4002",
+ "EventName": "SAMPLE_FILTRATE",
+ "BriefDescription": "Sample Taken and not removed by filtering"
+ },
+ {
+ "PublicDescription": "Sample collided with previous sample",
+ "EventCode": "0x4003",
+ "EventName": "SAMPLE_COLLISION",
+ "BriefDescription": "Sample collided with previous sample"
+ },
+ {
"PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.",
"EventCode": "0x4004",
"EventName": "CNT_CYCLES",
@@ -330,6 +354,96 @@
"BriefDescription": "Level 3 data cache long-latency read miss"
},
{
+ "PublicDescription": "Trace buffer current write pointer wrapped",
+ "EventCode": "0x400C",
+ "EventName": "TRB_WRAP",
+ "BriefDescription": "Trace buffer current write pointer wrapped"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 0",
+ "EventCode": "0x4010",
+ "EventName": "TRCEXTOUT0",
+ "BriefDescription": "PE Trace Unit external output 0"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 1",
+ "EventCode": "0x4011",
+ "EventName": "TRCEXTOUT1",
+ "BriefDescription": "PE Trace Unit external output 1"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 2",
+ "EventCode": "0x4012",
+ "EventName": "TRCEXTOUT2",
+ "BriefDescription": "PE Trace Unit external output 2"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 3",
+ "EventCode": "0x4013",
+ "EventName": "TRCEXTOUT3",
+ "BriefDescription": "PE Trace Unit external output 3"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 4",
+ "EventCode": "0x4018",
+ "EventName": "CTI_TRIGOUT4",
+ "BriefDescription": "Cross-trigger Interface output trigger 4"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 5 ",
+ "EventCode": "0x4019",
+ "EventName": "CTI_TRIGOUT5",
+ "BriefDescription": "Cross-trigger Interface output trigger 5 "
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 6",
+ "EventCode": "0x401A",
+ "EventName": "CTI_TRIGOUT6",
+ "BriefDescription": "Cross-trigger Interface output trigger 6"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 7",
+ "EventCode": "0x401B",
+ "EventName": "CTI_TRIGOUT7",
+ "BriefDescription": "Cross-trigger Interface output trigger 7"
+ },
+ {
+ "PublicDescription": "Access with additional latency from alignment",
+ "EventCode": "0x4020",
+ "EventName": "LDST_ALIGN_LAT",
+ "BriefDescription": "Access with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Load with additional latency from alignment",
+ "EventCode": "0x4021",
+ "EventName": "LD_ALIGN_LAT",
+ "BriefDescription": "Load with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Store with additional latency from alignment",
+ "EventCode": "0x4022",
+ "EventName": "ST_ALIGN_LAT",
+ "BriefDescription": "Store with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Checked data memory access",
+ "EventCode": "0x4024",
+ "EventName": "MEM_ACCESS_CHECKED",
+ "BriefDescription": "Checked data memory access"
+ },
+ {
+ "PublicDescription": "Checked data memory access, read",
+ "EventCode": "0x4025",
+ "EventName": "MEM_ACCESS_CHECKED_RD",
+ "BriefDescription": "Checked data memory access, read"
+ },
+ {
+ "PublicDescription": "Checked data memory access, write",
+ "EventCode": "0x4026",
+ "EventName": "MEM_ACCESS_CHECKED_WR",
+ "BriefDescription": "Checked data memory access, write"
+ },
+ {
"PublicDescription": "SIMD Instruction architecturally executed.",
"EventCode": "0x8000",
"EventName": "SIMD_INST_RETIRED",
@@ -342,6 +456,18 @@
"BriefDescription": "Instruction architecturally executed, SVE."
},
{
+ "PublicDescription": "ASE operations speculatively executed",
+ "EventCode": "0x8005",
+ "EventName": "ASE_INST_SPEC",
+ "BriefDescription": "ASE operations speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE operations speculatively executed",
+ "EventCode": "0x8006",
+ "EventName": "SVE_INST_SPEC",
+ "BriefDescription": "SVE operations speculatively executed"
+ },
+ {
"PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
"EventCode": "0x8008",
"EventName": "UOP_SPEC",
@@ -360,6 +486,24 @@
"BriefDescription": "Floating-point Operations speculatively executed."
},
{
+ "PublicDescription": "Floating-point half-precision operations speculatively executed",
+ "EventCode": "0x8014",
+ "EventName": "FP_HP_SPEC",
+ "BriefDescription": "Floating-point half-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point single-precision operations speculatively executed",
+ "EventCode": "0x8018",
+ "EventName": "FP_SP_SPEC",
+ "BriefDescription": "Floating-point single-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point double-precision operations speculatively executed",
+ "EventCode": "0x801C",
+ "EventName": "FP_DP_SPEC",
+ "BriefDescription": "Floating-point double-precision operations speculatively executed"
+ },
+ {
"PublicDescription": "Floating-point FMA Operations speculatively executed.",
"EventCode": "0x8028",
"EventName": "FP_FMA_SPEC",
@@ -390,6 +534,30 @@
"BriefDescription": "SVE predicated Operations speculatively executed."
},
{
+ "PublicDescription": "SVE predicated operations with no active predicates speculatively executed",
+ "EventCode": "0x8075",
+ "EventName": "SVE_PRED_EMPTY_SPEC",
+ "BriefDescription": "SVE predicated operations with no active predicates speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with all active predicates",
+ "EventCode": "0x8076",
+ "EventName": "SVE_PRED_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with all active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates",
+ "EventCode": "0x8077",
+ "EventName": "SVE_PRED_PARTIAL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations with empty or partially active predicates",
+ "EventCode": "0x8079",
+ "EventName": "SVE_PRED_NOT_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations with empty or partially active predicates"
+ },
+ {
"PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
"EventCode": "0x807C",
"EventName": "SVE_MOVPRFX_SPEC",
@@ -498,6 +666,12 @@
"BriefDescription": "SVE First-fault load Operations speculatively executed."
},
{
+ "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0",
+ "EventCode": "0x80BD",
+ "EventName": "SVE_LDFF_FAULT_SPEC",
+ "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0"
+ },
+ {
"PublicDescription": "Scalable floating-point element Operations speculatively executed.",
"EventCode": "0x80C0",
"EventName": "FP_SCALE_OPS_SPEC",
@@ -544,5 +718,29 @@
"EventCode": "0x80C7",
"EventName": "FP_DP_FIXED_OPS_SPEC",
"BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed",
+ "EventCode": "0x80E3",
+ "EventName": "ASE_SVE_INT8_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed",
+ "EventCode": "0x80E7",
+ "EventName": "ASE_SVE_INT16_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed",
+ "EventCode": "0x80EB",
+ "EventName": "ASE_SVE_INT32_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed",
+ "EventCode": "0x80EF",
+ "EventName": "ASE_SVE_INT64_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 31d8b57ca9bb..b899db48c12a 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -19,6 +19,7 @@
0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
0x00000000410fd400,v1,arm/neoverse-v1,core
+0x00000000410fd490,v1,arm/neoverse-n2,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
0x00000000460f0010,v1,fujitsu/a64fx,core
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json
index d0a19866563d..210afa856091 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
+++ b/tools/perf/pmu-events/arch/arm64/recommended.json
@@ -148,305 +148,305 @@
"EventCode": "0x60",
"EventName": "BUS_ACCESS_RD",
"BriefDescription": "Bus access read"
- },
- {
+ },
+ {
"PublicDescription": "Bus access write",
"EventCode": "0x61",
"EventName": "BUS_ACCESS_WR",
"BriefDescription": "Bus access write"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal, Cacheable, Shareable",
"EventCode": "0x62",
"EventName": "BUS_ACCESS_SHARED",
"BriefDescription": "Bus access, Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
"EventCode": "0x63",
"EventName": "BUS_ACCESS_NOT_SHARED",
"BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal",
"EventCode": "0x64",
"EventName": "BUS_ACCESS_NORMAL",
"BriefDescription": "Bus access, Normal"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, peripheral",
"EventCode": "0x65",
"EventName": "BUS_ACCESS_PERIPH",
"BriefDescription": "Bus access, peripheral"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, read",
"EventCode": "0x66",
"EventName": "MEM_ACCESS_RD",
"BriefDescription": "Data memory access, read"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, write",
"EventCode": "0x67",
"EventName": "MEM_ACCESS_WR",
"BriefDescription": "Data memory access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, read",
"EventCode": "0x68",
"EventName": "UNALIGNED_LD_SPEC",
"BriefDescription": "Unaligned access, read"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, write",
"EventCode": "0x69",
"EventName": "UNALIGNED_ST_SPEC",
"BriefDescription": "Unaligned access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access",
"EventCode": "0x6a",
"EventName": "UNALIGNED_LDST_SPEC",
"BriefDescription": "Unaligned access"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
"EventCode": "0x6c",
"EventName": "LDREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
"EventCode": "0x6d",
"EventName": "STREX_PASS_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
"EventCode": "0x6e",
"EventName": "STREX_FAIL_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
"EventCode": "0x6f",
"EventName": "STREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load",
"EventCode": "0x70",
"EventName": "LD_SPEC",
"BriefDescription": "Operation speculatively executed, load"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, store",
"EventCode": "0x71",
"EventName": "ST_SPEC",
"BriefDescription": "Operation speculatively executed, store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load or store",
"EventCode": "0x72",
"EventName": "LDST_SPEC",
"BriefDescription": "Operation speculatively executed, load or store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, integer data processing",
"EventCode": "0x73",
"EventName": "DP_SPEC",
"BriefDescription": "Operation speculatively executed, integer data processing"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
"EventCode": "0x74",
"EventName": "ASE_SPEC",
"BriefDescription": "Operation speculatively executed, Advanced SIMD instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, floating-point instruction",
"EventCode": "0x75",
"EventName": "VFP_SPEC",
"BriefDescription": "Operation speculatively executed, floating-point instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, software change of the PC",
"EventCode": "0x76",
"EventName": "PC_WRITE_SPEC",
"BriefDescription": "Operation speculatively executed, software change of the PC"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Cryptographic instruction",
"EventCode": "0x77",
"EventName": "CRYPTO_SPEC",
"BriefDescription": "Operation speculatively executed, Cryptographic instruction"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, immediate branch",
"EventCode": "0x78",
"EventName": "BR_IMMED_SPEC",
"BriefDescription": "Branch speculatively executed, immediate branch"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, procedure return",
"EventCode": "0x79",
"EventName": "BR_RETURN_SPEC",
"BriefDescription": "Branch speculatively executed, procedure return"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, indirect branch",
"EventCode": "0x7a",
"EventName": "BR_INDIRECT_SPEC",
"BriefDescription": "Branch speculatively executed, indirect branch"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, ISB",
"EventCode": "0x7c",
"EventName": "ISB_SPEC",
"BriefDescription": "Barrier speculatively executed, ISB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DSB",
"EventCode": "0x7d",
"EventName": "DSB_SPEC",
"BriefDescription": "Barrier speculatively executed, DSB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DMB",
"EventCode": "0x7e",
"EventName": "DMB_SPEC",
"BriefDescription": "Barrier speculatively executed, DMB"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other synchronous",
"EventCode": "0x81",
"EventName": "EXC_UNDEF",
"BriefDescription": "Exception taken, Other synchronous"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Supervisor Call",
"EventCode": "0x82",
"EventName": "EXC_SVC",
"BriefDescription": "Exception taken, Supervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort",
"EventCode": "0x83",
"EventName": "EXC_PABORT",
"BriefDescription": "Exception taken, Instruction Abort"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort and SError",
"EventCode": "0x84",
"EventName": "EXC_DABORT",
"BriefDescription": "Exception taken, Data Abort and SError"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ",
"EventCode": "0x86",
"EventName": "EXC_IRQ",
"BriefDescription": "Exception taken, IRQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ",
"EventCode": "0x87",
"EventName": "EXC_FIQ",
"BriefDescription": "Exception taken, FIQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Secure Monitor Call",
"EventCode": "0x88",
"EventName": "EXC_SMC",
"BriefDescription": "Exception taken, Secure Monitor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Hypervisor Call",
"EventCode": "0x8a",
"EventName": "EXC_HVC",
"BriefDescription": "Exception taken, Hypervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort not taken locally",
"EventCode": "0x8b",
"EventName": "EXC_TRAP_PABORT",
"BriefDescription": "Exception taken, Instruction Abort not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort or SError not taken locally",
"EventCode": "0x8c",
"EventName": "EXC_TRAP_DABORT",
"BriefDescription": "Exception taken, Data Abort or SError not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other traps not taken locally",
"EventCode": "0x8d",
"EventName": "EXC_TRAP_OTHER",
"BriefDescription": "Exception taken, Other traps not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ not taken locally",
"EventCode": "0x8e",
"EventName": "EXC_TRAP_IRQ",
"BriefDescription": "Exception taken, IRQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ not taken locally",
"EventCode": "0x8f",
"EventName": "EXC_TRAP_FIQ",
"BriefDescription": "Exception taken, FIQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Load-Acquire",
"EventCode": "0x90",
"EventName": "RC_LD_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Store-Release",
"EventCode": "0x91",
"EventName": "RC_ST_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Store-Release"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, read",
"EventCode": "0xa0",
"EventName": "L3D_CACHE_RD",
"BriefDescription": "Attributable Level 3 data or unified cache access, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, write",
"EventCode": "0xa1",
"EventName": "L3D_CACHE_WR",
"BriefDescription": "Attributable Level 3 data or unified cache access, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, read",
"EventCode": "0xa2",
"EventName": "L3D_CACHE_REFILL_RD",
"BriefDescription": "Attributable Level 3 data or unified cache refill, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, write",
"EventCode": "0xa3",
"EventName": "L3D_CACHE_REFILL_WR",
"BriefDescription": "Attributable Level 3 data or unified cache refill, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim",
"EventCode": "0xa6",
"EventName": "L3D_CACHE_WB_VICTIM",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean",
"EventCode": "0xa7",
"EventName": "L3D_CACHE_WB_CLEAN",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, invalidate",
"EventCode": "0xa8",
"EventName": "L3D_CACHE_INVAL",
"BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
- }
+ }
]
diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
index 73089c682f80..41bac1c6a008 100644
--- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
@@ -19,6 +19,22 @@
"EdgeDetect": "0"
},
{
+ "Unit": "CBO",
+ "EventCode": "0xE0",
+ "UMask": "0x00",
+ "EventName": "event-hyphen",
+ "BriefDescription": "UNC_CBO_HYPHEN",
+ "PublicDescription": "UNC_CBO_HYPHEN"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0xC0",
+ "UMask": "0x00",
+ "EventName": "event-two-hyph",
+ "BriefDescription": "UNC_CBO_TWO_HYPH",
+ "PublicDescription": "UNC_CBO_TWO_HYPH"
+ },
+ {
"EventCode": "0x7",
"EventName": "uncore_hisi_l3c.rd_hit_cpipe",
"BriefDescription": "Total read hits",
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 2e7c4153875b..1a57c3f81dd4 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -672,8 +672,6 @@ static int json_events(const char *fn,
addfield(map, &je.metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &je.metric_expr, "", "", val);
- for (s = je.metric_expr; *s; s++)
- *s = tolower(*s);
} else if (json_streq(map, field, "ArchStdEvent")) {
addfield(map, &arch_std, "", "", val);
for (s = arch_std; *s; s++)
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 803ca426f8e6..af2b37ef7c70 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o
perf-y += expand-cgroup.o
perf-y += perf-time-to-tsc.o
perf-y += dlfilter-test.o
+perf-y += sigtrap.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 0f73e300f207..56fba08a3037 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -65,7 +65,7 @@ do { \
#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
-static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
+static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
FILE *file;
@@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
/* syscall arguments */
__WRITE_ASS(fd, "d", fd);
__WRITE_ASS(group_fd, "d", group_fd);
- __WRITE_ASS(cpu, "d", cpu);
+ __WRITE_ASS(cpu, "d", cpu.cpu);
__WRITE_ASS(pid, "d", pid);
__WRITE_ASS(flags, "lu", flags);
@@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
return 0;
}
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
int errno_saved = errno;
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 384856347236..4965dd666956 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits)
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i], bm);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8cb5a1c3489e..fac3717d9ba1 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = {
&suite__expand_cgroup_events,
&suite__perf_time_to_tsc,
&suite__dlfilter,
+ &suite__sigtrap,
NULL,
};
@@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
continue;
st.file = ent->d_name;
- pr_info("%2d: %-*s:", i, width, test_suite.desc);
+ pr_info("%3d: %-*s:", i, width, test_suite.desc);
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
- pr_info("%2d: %-*s:", i, width, test_description(t, -1));
+ pr_info("%3d: %-*s:", i, width, test_description(t, -1));
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
curr, argc, argv))
continue;
- pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ pr_info("%3d.%1d: %-*s:", i, subi + 1, subw,
test_description(t, subi));
test_and_print(t, subi);
}
@@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, t.desc);
+ pr_info("%3d: %s\n", i, t.desc);
}
@@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv)
if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, test_description(t, -1));
+ pr_info("%3d: %s\n", i, test_description(t, -1));
if (has_subtests(t)) {
int subn = num_subtests(t);
int subi;
for (subi = 0; subi < subn; subi++)
- pr_info("%2d:%1d: %s\n", i, subi + 1,
+ pr_info("%3d:%1d: %s\n", i, subi + 1,
test_description(t, subi));
}
}
@@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv)
if (ret < 0)
return ret;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
if (argc >= 1 && !strcmp(argv[0], "list"))
return perf_test__list(argc - 1, argv + 1);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 89a155092f85..84e87e31f119 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong nr", map->nr == 20);
for (i = 0; i < 20; i++) {
- TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+ TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i);
}
perf_cpu_map__put(map);
@@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
map = cpu_map__new_data(data);
TEST_ASSERT_VAL("wrong nr", map->nr == 2);
- TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
- TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+ TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1);
+ TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256);
TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
perf_cpu_map__put(map);
return 0;
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index d01532d40acb..78db4d704e76 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -75,10 +75,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
- TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
- TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
- TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
- TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2);
+ TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3);
perf_cpu_map__put(map);
return 0;
}
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index b17b86391383..4c96829510c9 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -25,14 +25,15 @@ static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
unsigned long *bm = NULL;
- int i;
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < map->nr; i++) {
- set_bit(map->map[i], bm);
- }
+ struct perf_cpu cpu;
+ int i;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ set_bit(cpu.cpu, bm);
}
if (map)
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 90b2feda31ac..c3c17600f29c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -59,11 +59,12 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
}
CPU_ZERO(&cpu_set);
- CPU_SET(cpus->map[0], &cpu_set);
+ CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set);
sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
- cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
+ perf_cpu_map__cpu(cpus, 0).cpu,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_free_cpus;
}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index cd3dd463783f..1ab362323d25 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -22,7 +22,8 @@
static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
- int err = -1, fd, cpu;
+ int err = -1, fd, idx;
+ struct perf_cpu cpu;
struct perf_cpu_map *cpus;
struct evsel *evsel;
unsigned int nr_openat_calls = 111, i;
@@ -58,23 +59,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
goto out_evsel_delete;
}
- for (cpu = 0; cpu < cpus->nr; ++cpu) {
- unsigned int ncalls = nr_openat_calls + cpu;
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ unsigned int ncalls = nr_openat_calls + idx;
/*
* XXX eventually lift this restriction in a way that
* keeps perf building on older glibc installations
* without CPU_ALLOC. 1024 cpus in 2010 still seems
* a reasonable upper limit tho :-)
*/
- if (cpus->map[cpu] >= CPU_SETSIZE) {
- pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+ if (cpu.cpu >= CPU_SETSIZE) {
+ pr_debug("Ignoring CPU %d\n", cpu.cpu);
continue;
}
- CPU_SET(cpus->map[cpu], &cpu_set);
+ CPU_SET(cpu.cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
- cpus->map[cpu],
+ cpu.cpu,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_close_fd;
}
@@ -82,37 +83,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
fd = openat(0, "/etc/passwd", O_RDONLY);
close(fd);
}
- CPU_CLR(cpus->map[cpu], &cpu_set);
+ CPU_CLR(cpu.cpu, &cpu_set);
}
- /*
- * Here we need to explicitly preallocate the counts, as if
- * we use the auto allocation it will allocate just for 1 cpu,
- * as we start by cpu 0.
- */
- if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
- pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
- goto out_close_fd;
- }
+ evsel->core.cpus = perf_cpu_map__get(cpus);
err = 0;
- for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
unsigned int expected;
- if (cpus->map[cpu] >= CPU_SETSIZE)
+ if (cpu.cpu >= CPU_SETSIZE)
continue;
- if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ if (evsel__read_on_cpu(evsel, idx, 0) < 0) {
pr_debug("evsel__read_on_cpu\n");
err = -1;
break;
}
- expected = nr_openat_calls + cpu;
- if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
+ expected = nr_openat_calls + idx;
+ if (perf_counts(evsel->counts, idx, 0)->val != expected) {
pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
- expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
+ expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val);
err = -1;
}
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index a508f1dbcb2a..e71efadb24f5 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2069,6 +2069,31 @@ static int test_event(struct evlist_test *e)
return ret;
}
+static int test_event_fake_pmu(const char *str)
+{
+ struct parse_events_error err;
+ struct evlist *evlist;
+ int ret;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ parse_events_error__init(&err);
+ perf_pmu__test_parse_init();
+ ret = __parse_events(evlist, str, &err, &perf_pmu__fake);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ str, ret, err.str);
+ parse_events_error__print(&err, str);
+ }
+
+ parse_events_error__exit(&err);
+ evlist__delete(evlist);
+
+ return ret;
+}
+
static int test_events(struct evlist_test *events, unsigned cnt)
{
int ret1, ret2 = 0;
@@ -2276,6 +2301,26 @@ static int test_pmu_events_alias(char *event, char *alias)
return test_event(&e);
}
+static int test_pmu_events_alias2(void)
+{
+ static const char events[][30] = {
+ "event-hyphen",
+ "event-two-hyph",
+ };
+ unsigned long i;
+ int ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ ret = test_event_fake_pmu(&events[i][0]);
+ if (ret) {
+ pr_err("check_parse_fake %s failed\n", &events[i][0]);
+ break;
+ }
+ }
+
+ return ret;
+}
+
static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
int ret1, ret2 = 0;
@@ -2313,6 +2358,10 @@ do { \
return ret;
}
+ ret1 = test_pmu_events_alias2();
+ if (!ret2)
+ ret2 = ret1;
+
ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
if (!ret2)
ret2 = ret1;
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index df1c9a3cc05b..1c695fb5a79c 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -143,6 +143,34 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
.matching_pmu = "uncore_cbox_0",
};
+static const struct perf_pmu_test_event uncore_hyphen = {
+ .event = {
+ .name = "event-hyphen",
+ .event = "umask=0x00,event=0xe0",
+ .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_HYPHEN",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xe0",
+ .alias_long_desc = "UNC_CBO_HYPHEN",
+ .matching_pmu = "uncore_cbox_0",
+};
+
+static const struct perf_pmu_test_event uncore_two_hyph = {
+ .event = {
+ .name = "event-two-hyph",
+ .event = "umask=0x00,event=0xc0",
+ .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH",
+ .topic = "uncore",
+ .long_desc = "UNC_CBO_TWO_HYPH",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0,event=0xc0",
+ .alias_long_desc = "UNC_CBO_TWO_HYPH",
+ .matching_pmu = "uncore_cbox_0",
+};
+
static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
.event = {
.name = "uncore_hisi_l3c.rd_hit_cpipe",
@@ -188,6 +216,8 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
static const struct perf_pmu_test_event *uncore_events[] = {
&uncore_hisi_ddrc_flux_wcmd,
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
&uncore_hisi_l3c_rd_hit_cpipe,
&uncore_imc_free_running_cache_miss,
&uncore_imc_cache_hits,
@@ -654,6 +684,8 @@ static struct perf_pmu_test_pmu test_pmus[] = {
},
.aliases = {
&unc_cbo_xsnp_response_miss_eviction,
+ &uncore_hyphen,
+ &uncore_two_hyph,
},
},
{
diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh
index de24d374ce24..cb35e488809a 100755
--- a/tools/perf/tests/shell/stat_all_metricgroups.sh
+++ b/tools/perf/tests/shell/stat_all_metricgroups.sh
@@ -6,7 +6,7 @@ set -e
for m in $(perf list --raw-dump metricgroups); do
echo "Testing $m"
- perf stat -M "$m" true
+ perf stat -M "$m" -a true
done
exit 0
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
new file mode 100644
index 000000000000..1f147fe6595f
--- /dev/null
+++ b/tools/perf/tests/sigtrap.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic test for sigtrap support.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/string.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "cloexec.h"
+#include "debug.h"
+#include "event.h"
+#include "tests.h"
+#include "../perf-sys.h"
+
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+#define BP_ACCOUNT_IS_SUPPORTED 0
+#else
+#define BP_ACCOUNT_IS_SUPPORTED 1
+#endif
+
+#define NUM_THREADS 5
+
+static struct {
+ int tids_want_signal; /* Which threads still want a signal. */
+ int signal_count; /* Sanity check number of signals received. */
+ volatile int iterate_on; /* Variable to set breakpoint on. */
+ siginfo_t first_siginfo; /* First observed siginfo_t. */
+} ctx;
+
+#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on))
+
+static struct perf_event_attr make_event_attr(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_BREAKPOINT,
+ .size = sizeof(attr),
+ .sample_period = 1,
+ .disabled = 1,
+ .bp_addr = (unsigned long)&ctx.iterate_on,
+ .bp_type = HW_BREAKPOINT_RW,
+ .bp_len = HW_BREAKPOINT_LEN_1,
+ .inherit = 1, /* Children inherit events ... */
+ .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+ .remove_on_exec = 1, /* Required by sigtrap. */
+ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */
+ .sig_data = TEST_SIG_DATA,
+ .exclude_kernel = 1, /* To allow */
+ .exclude_hv = 1, /* running as !root */
+ };
+ return attr;
+}
+
+static void
+sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
+{
+ if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+ ctx.first_siginfo = *info;
+ __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+ pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+ pid_t tid = syscall(SYS_gettid);
+ int i;
+
+ pthread_barrier_wait(barrier);
+
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ for (i = 0; i < ctx.iterate_on - 1; i++)
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+
+ return NULL;
+}
+
+static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int i;
+
+ pthread_barrier_wait(barrier);
+ for (i = 0; i < NUM_THREADS; i++)
+ TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0);
+
+ return TEST_OK;
+}
+
+static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int ret;
+
+ ctx.iterate_on = 3000;
+
+ TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0);
+ TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ ret = run_test_threads(threads, barrier);
+ TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0);
+ TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on);
+#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */
+ TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type,
+ PERF_TYPE_BREAKPOINT);
+ TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data,
+ TEST_SIG_DATA);
+#endif
+
+ return ret;
+}
+
+static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_event_attr attr = make_event_attr();
+ struct sigaction action = {};
+ struct sigaction oldact;
+ pthread_t threads[NUM_THREADS];
+ pthread_barrier_t barrier;
+ char sbuf[STRERR_BUFSIZE];
+ int i, fd, ret = TEST_FAIL;
+
+ if (!BP_ACCOUNT_IS_SUPPORTED) {
+ pr_debug("Test not supported on this architecture");
+ return TEST_SKIP;
+ }
+
+ pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1);
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ if (sigaction(SIGTRAP, &action, &oldact)) {
+ pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_restore_sigaction;
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, test_thread, &barrier)) {
+ pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_close_perf_event;
+ }
+ }
+
+ ret = run_stress_test(fd, threads, &barrier);
+
+out_close_perf_event:
+ close(fd);
+out_restore_sigaction:
+ sigaction(SIGTRAP, &oldact, NULL);
+out:
+ pthread_barrier_destroy(&barrier);
+ return ret;
+}
+
+DEFINE_SUITE("Sigtrap", sigtrap);
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
index 2eb096b5e6da..500974040fe3 100644
--- a/tools/perf/tests/stat.c
+++ b/tools/perf/tests/stat.c
@@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub
count.run = 300;
TEST_ASSERT_VAL("failed to synthesize stat_config",
- !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+ !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3,
+ &count, process_stat_event, NULL));
return 0;
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 8f65098110fc..5bbb8f6a48fc 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing);
DECLARE_SUITE(expand_cgroup_events);
DECLARE_SUITE(perf_time_to_tsc);
DECLARE_SUITE(dlfilter);
+DECLARE_SUITE(sigtrap);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 869986139146..ee1e3dcbc0bd 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -112,62 +112,88 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- if (!cpu_map__has(map, i))
+ struct perf_cpu cpu = { .cpu = i };
+
+ if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
session->header.env.cpu[i].core_id,
session->header.env.cpu[i].socket_id);
}
+ // Test that CPU ID contains socket, die, core and CPU
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
+ perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+
+ TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
+
+ TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
+ TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1);
+ }
+
// Test that core ID contains socket, die and core
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_core(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[map->map[i]].core_id == id.core);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[map->map[i]].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
}
// Test that die ID contains socket and die
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_die(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[map->map[i]].die_id == id.die);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1);
}
// Test that socket ID contains only socket
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_socket(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[map->map[i]].socket_id == id.socket);
+ session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1);
}
// Test that node ID contains only node
- for (i = 0; i < map->nr; i++) {
- id = cpu_map__get_node(map, i, NULL);
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(map->map[i]) == id.node);
+ cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
+ TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1);
}
perf_session__delete(session);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e81c2493efdf..44ba900828f6 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
.opts = opts,
};
int ret = -1, err;
+ int not_annotated = list_empty(&notes->src->source);
if (sym == NULL)
return -1;
@@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
- err = symbol__annotate2(ms, evsel, opts, &browser.arch);
- if (err) {
- char msg[BUFSIZ];
- ms->map->dso->annotate_warned = true;
- symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
- ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
- goto out_free_offsets;
+ if (not_annotated) {
+ err = symbol__annotate2(ms, evsel, opts, &browser.arch);
+ if (err) {
+ char msg[BUFSIZ];
+ ms->map->dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ goto out_free_offsets;
+ }
}
ui_helpline__push("Press ESC to exit");
@@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
ret = annotate_browser__run(&browser, evsel, hbt);
- annotated_source__purge(notes->src);
+ if(not_annotated)
+ annotated_source__purge(notes->src);
out_free_offsets:
- zfree(&notes->offsets);
+ if(not_annotated)
+ zfree(&notes->offsets);
return ret;
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2e5bfbb69960..2a403cefcaf2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm64-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 7b12bd7a3080..4d216c0dc425 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -11,7 +11,7 @@
static int get_cpu_set_size(void)
{
- int sz = cpu__max_cpu() + 8 - 1;
+ int sz = cpu__max_cpu().cpu + 8 - 1;
/*
* sched_getaffinity doesn't like masks smaller than the kernel.
* Hopefully that's big enough.
@@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu)
clear_bit(cpu, a->sched_cpus);
}
-void affinity__cleanup(struct affinity *a)
+static void __affinity__cleanup(struct affinity *a)
{
int cpu_set_size = get_cpu_set_size();
@@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a)
zfree(&a->sched_cpus);
zfree(&a->orig_cpus);
}
+
+void affinity__cleanup(struct affinity *a)
+{
+ if (a != NULL)
+ __affinity__cleanup(a);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 3fc528c9270c..5e390a1a79ab 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
+ if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+ decoder->record.latency = payload;
break;
case ARM_SPE_CONTEXT:
decoder->record.context_id = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 46a8556a9e95..69b31084d6be 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -33,6 +33,7 @@ struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
u32 op;
+ u32 latency;
u64 from_ip;
u64 to_ip;
u64 timestamp;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..d2b64e3f588b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
u8 sample_branch;
u8 sample_remote_access;
u8 sample_memory;
+ u8 sample_instructions;
+ u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
u64 branch_miss_id;
u64 remote_access_id;
u64 memory_id;
+ u64 instructions_id;
u64 kernel_start;
@@ -90,6 +93,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
+ u64 period_instructions;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
+ speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ /*
+ * Handles perf instruction sampling period.
+ */
+ speq->period_instructions++;
+ if (speq->period_instructions < spe->instructions_sample_period)
+ return 0;
+ speq->period_instructions = 0;
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.period = spe->instructions_sample_period;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
+ if (spe->sample_instructions) {
+ err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@@ -1107,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
return err;
spe->memory_id = id;
arm_spe_set_event_name(evlist, id, "memory");
+ id += 1;
+ }
+
+ if (spe->synth_opts.instructions) {
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+ goto synth_instructions_out;
+ }
+ if (spe->synth_opts.period > 1)
+ pr_warning("Arm SPE has a hardware-based sample period.\n"
+ "Additional instruction events will be discarded by --itrace\n");
+
+ spe->sample_instructions = true;
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = spe->synth_opts.period;
+ spe->instructions_sample_period = attr.sample_period;
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->instructions_id = id;
+ arm_spe_set_event_name(evlist, id, "instructions");
}
+synth_instructions_out:
return 0;
}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..2242a885fbd7
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+ int ret;
+ struct entries entries = {};
+ struct regs_dump old_regs = sample->user_regs;
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ /*
+ * If PC and SP are not recorded, get the value of PC from the stack
+ * and set its mask. SP is not used when doing the unwinding but it
+ * still needs to be set to prevent failures.
+ */
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ }
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ }
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+ sample->user_regs = old_regs;
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..32af9ce94398
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include "event.h"
+#include "thread.h"
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c679394b898d..825336304a37 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->prev = 0;
mm->idx = mp->idx;
mm->tid = mp->tid;
- mm->cpu = mp->cpu;
+ mm->cpu = mp->cpu.cpu;
if (!mp->len) {
mm->base = NULL;
@@ -174,13 +174,13 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
mp->idx = idx;
if (per_cpu) {
- mp->cpu = evlist->core.cpus->map[idx];
+ mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else
mp->tid = -1;
} else {
- mp->cpu = -1;
+ mp->cpu.cpu = -1;
mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
}
}
@@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
if (!queue->set) {
queue->set = true;
queue->tid = buffer->tid;
- queue->cpu = buffer->cpu;
+ queue->cpu = buffer->cpu.cpu;
}
buffer->buffer_nr = queues->next_buffer_nr++;
@@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
-static bool filter_cpu(struct perf_session *session, int cpu)
+static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
{
unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
- return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+ return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
}
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
@@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
struct auxtrace_buffer buffer = {
.pid = -1,
.tid = event->auxtrace.tid,
- .cpu = event->auxtrace.cpu,
+ .cpu = { event->auxtrace.cpu },
.data_offset = data_offset,
.offset = event->auxtrace.offset,
.reference = event->auxtrace.reference,
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index bbf0d78c6401..19910b9011f3 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <internal/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
@@ -240,7 +241,7 @@ struct auxtrace_buffer {
size_t size;
pid_t pid;
pid_t tid;
- int cpu;
+ struct perf_cpu cpu;
void *data;
off_t data_offset;
void *mmap_addr;
@@ -350,7 +351,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
- int cpu;
+ struct perf_cpu cpu;
};
/**
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 528aeb0ab79d..7ecfaac7536a 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -424,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
size_t prologue_cnt = 0;
int i, err;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@@ -573,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
bool need_prologue = false;
int err, i;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -645,8 +645,11 @@ int bpf__probe(struct bpf_object *obj)
goto out;
priv = bpf_program__priv(prog);
- if (IS_ERR(priv) || !priv) {
- err = PTR_ERR(priv);
+ if (IS_ERR_OR_NULL(priv)) {
+ if (!priv)
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ else
+ err = PTR_ERR(priv);
goto out;
}
@@ -696,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -754,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj,
struct perf_probe_event *pev;
int i, fd;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 5a97fd7d0a71..3ce8d03cb7ec 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel)
return 0;
}
-static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
int fd)
{
struct bpf_prog_profiler_bpf *skel;
@@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
if (ret)
return ret;
}
@@ -554,7 +554,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
filter_type == BPERF_FILTER_TGID)
key = evsel->core.threads->map[i].pid;
else if (filter_type == BPERF_FILTER_CPU)
- key = evsel->core.cpus->map[i];
+ key = evsel->core.cpus->map[i].cpu;
else
break;
@@ -580,12 +580,12 @@ out:
return err;
}
-static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
+static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
struct bperf_leader_bpf *skel = evsel->leader_skel;
return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
}
/*
@@ -598,7 +598,7 @@ static int bperf_sync_counters(struct evsel *evsel)
num_cpu = all_cpu_map->nr;
for (i = 0; i < num_cpu; i++) {
- cpu = all_cpu_map->map[i];
+ cpu = all_cpu_map->map[i].cpu;
bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
}
return 0;
@@ -619,15 +619,17 @@ static int bperf__disable(struct evsel *evsel)
static int bperf__read(struct evsel *evsel)
{
struct bperf_follower_bpf *skel = evsel->follower_skel;
- __u32 num_cpu_bpf = cpu__max_cpu();
+ __u32 num_cpu_bpf = cpu__max_cpu().cpu;
struct bpf_perf_event_value values[num_cpu_bpf];
int reading_map_fd, err = 0;
- __u32 i, j, num_cpu;
+ __u32 i;
+ int j;
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ struct perf_cpu entry;
__u32 cpu;
err = bpf_map_lookup_elem(reading_map_fd, &i, values);
@@ -637,16 +639,15 @@ static int bperf__read(struct evsel *evsel)
case BPERF_FILTER_GLOBAL:
assert(i == 0);
- num_cpu = all_cpu_map->nr;
- for (j = 0; j < num_cpu; j++) {
- cpu = all_cpu_map->map[j];
+ perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) {
+ cpu = entry.cpu;
perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
}
break;
case BPERF_FILTER_CPU:
- cpu = evsel->core.cpus->map[i];
+ cpu = evsel->core.cpus->map[i].cpu;
perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
@@ -771,11 +772,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel)
evsel->follower_skel == NULL;
}
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
if (bpf_counter_skip(evsel))
return 0;
- return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
+ return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index 65ebaa6694fb..4dbf26408b69 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
struct target *target);
typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
int fd);
struct bpf_counter_ops {
@@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__disable(struct evsel *evsel);
int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
#else /* HAVE_BPF_SKEL */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index cbc6c2bca488..631e34a0b66f 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist)
struct cgroup *cgrp, *leader_cgrp;
__u32 i, cpu;
__u32 nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int total_cpus = cpu__max_cpu().cpu;
int map_size, map_fd;
int prog_fd, err;
@@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist)
for (cpu = 0; cpu < nr_cpus; cpu++) {
int fd = FD(evsel, cpu);
__u32 idx = evsel->core.idx * total_cpus +
- evlist->core.all_cpus->map[cpu];
+ evlist->core.all_cpus->map[cpu].cpu;
err = bpf_map_update_elem(map_fd, &idx, &fd,
BPF_ANY);
@@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist)
int prog_fd = bpf_program__fd(skel->progs.trigger_read);
for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
+ cpu = evlist->core.all_cpus->map[i].cpu;
bperf_trigger_reading(prog_fd, cpu);
}
@@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
{
struct evlist *evlist = evsel->evlist;
int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int total_cpus = cpu__max_cpu().cpu;
struct perf_counts_values *counts;
struct bpf_perf_event_value *values;
int reading_map_fd, err = 0;
@@ -272,7 +272,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
}
for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
+ cpu = evlist->core.all_cpus->map[i].cpu;
counts = perf_counts(evsel->counts, i, 0);
counts->val = values[cpu].counter;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000000..d756cc66eef3
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,152 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ int i, ncpus = 1, ntasks = 1;
+ struct filter_entry *func;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
+
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu().cpu;
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000000..ea94187fe443
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta in usec
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < ((1000UL) << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8e2777133bd9..131207b91d15 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
map__zput(node->ms.map);
}
-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
{
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /*
+ * It's necessary to use libunwind to reliably determine the caller of
+ * a leaf function on aarch64, as otherwise we cannot know whether to
+ * start from the LR or FP.
+ *
+ * Always starting from the LR can result in duplicate or entirely
+ * erroneous entries. Always skipping the LR and starting from the FP
+ * can result in missing entries.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+ dwarf_callchain_users = true;
}
static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
@@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);
bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 582f3aeaf5e4..7a447d918458 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -4,6 +4,7 @@
#include <string.h>
#include "evsel.h"
#include "counts.h"
+#include <perf/threadmap.h>
#include <linux/zalloc.h>
struct perf_counts *perf_counts__new(int ncpus, int nthreads)
@@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel)
perf_counts__reset(evsel->counts);
}
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel)
{
- evsel->counts = perf_counts__new(ncpus, nthreads);
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 7ff36bf6d644..5de275194f2b 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -18,21 +18,21 @@ struct perf_counts {
static inline struct perf_counts_values*
-perf_counts(struct perf_counts *counts, int cpu, int thread)
+perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return xyarray__entry(counts->values, cpu, thread);
+ return xyarray__entry(counts->values, cpu_map_idx, thread);
}
static inline bool
-perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread)
+perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return *((bool *) xyarray__entry(counts->loaded, cpu, thread));
+ return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
}
static inline void
-perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded)
+perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
{
- *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded;
+ *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
}
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
@@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts);
void perf_counts__reset(struct perf_counts *counts);
void evsel__reset_counts(struct evsel *evsel);
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+int evsel__alloc_counts(struct evsel *evsel);
void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 87d3eca9b872..12b2243222b0 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -13,9 +13,13 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
-static int max_cpu_num;
-static int max_present_cpu_num;
+static struct perf_cpu max_cpu_num;
+static struct perf_cpu max_present_cpu_num;
static int max_node_num;
+/**
+ * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
+ * CPU number.
+ */
static int *cpunode_map;
static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
@@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
* otherwise it would become 65535.
*/
if (cpus->cpu[i] == (u16) -1)
- map->map[i] = -1;
+ map->map[i].cpu = -1;
else
- map->map[i] = (int) cpus->cpu[i];
+ map->map[i].cpu = (int) cpus->cpu[i];
}
}
@@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map
int cpu, i = 0;
for_each_set_bit(cpu, mask->mask, nbits)
- map->map[i++] = cpu;
+ map->map[i++].cpu = cpu;
}
return map;
@@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = -1;
+ cpus->map[i].cpu = -1;
refcount_set(&cpus->refcnt, 1);
}
@@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = cpu_map__empty_aggr_cpu_id();
+ cpus->map[i] = aggr_cpu_id__empty();
refcount_set(&cpus->refcnt, 1);
}
@@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value)
return sysfs__read_int(path, value);
}
-int cpu_map__get_socket_id(int cpu)
+int cpu__get_socket_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
- void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- id.socket = cpu_map__get_socket_id(cpu);
+ id.socket = cpu__get_socket_id(cpu);
return id;
}
-static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
+static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
{
struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
@@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
return a->thread - b->thread;
}
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data)
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data)
{
- int nr = cpus->nr;
- struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr);
- int cpu, s2;
- struct aggr_cpu_id s1;
+ int idx;
+ struct perf_cpu cpu;
+ struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
if (!c)
- return -1;
+ return NULL;
/* Reset size as it may only be partially filled */
c->nr = 0;
- for (cpu = 0; cpu < nr; cpu++) {
- s1 = f(cpus, cpu, data);
- for (s2 = 0; s2 < c->nr; s2++) {
- if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ bool duplicate = false;
+ struct aggr_cpu_id cpu_id = get_id(cpu, data);
+
+ for (int j = 0; j < c->nr; j++) {
+ if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
+ duplicate = true;
break;
+ }
}
- if (s2 == c->nr) {
- c->map[c->nr] = s1;
+ if (!duplicate) {
+ c->map[c->nr] = cpu_id;
c->nr++;
}
}
+ /* Trim. */
+ if (c->nr != cpus->nr) {
+ struct cpu_aggr_map *trimmed_c =
+ realloc(c,
+ sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
+
+ if (trimmed_c)
+ c = trimmed_c;
+ }
/* ensure we process id in increasing order */
- qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
+ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+
+ return c;
- *res = c;
- return 0;
}
-int cpu_map__get_die_id(int cpu)
+int cpu__get_die_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
{
- int cpu, die;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
+ int die;
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- die = cpu_map__get_die_id(cpu);
+ die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
if (die == -1)
die = 0;
@@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
* with the socket ID and then add die to
* make a unique ID.
*/
- id = cpu_map__get_socket(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ id = aggr_cpu_id__socket(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
id.die = die;
return id;
}
-int cpu_map__get_core_id(int cpu)
+int cpu__get_core_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
return ret ?: value;
}
-int cpu_map__get_node_id(int cpu)
-{
- return cpu__get_node(cpu);
-}
-
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
-
- if (idx > map->nr)
- return id;
+ struct aggr_cpu_id id;
+ int core = cpu__get_core_id(cpu);
- cpu = map->map[idx];
-
- cpu = cpu_map__get_core_id(cpu);
-
- /* cpu_map__get_die returns a struct with socket and die set*/
- id = cpu_map__get_die(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ /* aggr_cpu_id__die returns a struct with socket and die set. */
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
/*
* core_id is relative to socket and die, we need a global id.
* So we combine the result from cpu_map__get_die with the core id
*/
- id.core = cpu;
+ id.core = core;
return id;
+
}
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
{
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
- if (idx < 0 || idx >= map->nr)
+ /* aggr_cpu_id__core returns a struct with socket, die and core set. */
+ id = aggr_cpu_id__core(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
- id.node = cpu_map__get_node_id(map->map[idx]);
+ id.cpu = cpu;
return id;
-}
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp)
-{
- return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep)
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
{
- return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
-}
-
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep)
-{
- return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
-}
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap)
-{
- return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL);
+ id.node = cpu__get_node(cpu);
+ return id;
}
/* setup simple routines to easily access node numbers given a cpu number */
@@ -335,8 +319,8 @@ static void set_max_cpu_num(void)
int ret = -1;
/* set up default */
- max_cpu_num = 4096;
- max_present_cpu_num = 4096;
+ max_cpu_num.cpu = 4096;
+ max_present_cpu_num.cpu = 4096;
mnt = sysfs__mountpoint();
if (!mnt)
@@ -349,7 +333,7 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num);
+ ret = get_max_num(path, &max_cpu_num.cpu);
if (ret)
goto out;
@@ -360,11 +344,11 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num);
+ ret = get_max_num(path, &max_present_cpu_num.cpu);
out:
if (ret)
- pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
+ pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
}
/* Determine highest possible node in the system for sparse allocation */
@@ -403,31 +387,31 @@ int cpu__max_node(void)
return max_node_num;
}
-int cpu__max_cpu(void)
+struct perf_cpu cpu__max_cpu(void)
{
- if (unlikely(!max_cpu_num))
+ if (unlikely(!max_cpu_num.cpu))
set_max_cpu_num();
return max_cpu_num;
}
-int cpu__max_present_cpu(void)
+struct perf_cpu cpu__max_present_cpu(void)
{
- if (unlikely(!max_present_cpu_num))
+ if (unlikely(!max_present_cpu_num.cpu))
set_max_cpu_num();
return max_present_cpu_num;
}
-int cpu__get_node(int cpu)
+int cpu__get_node(struct perf_cpu cpu)
{
if (unlikely(cpunode_map == NULL)) {
pr_debug("cpu_map not initialized\n");
return -1;
}
- return cpunode_map[cpu];
+ return cpunode_map[cpu.cpu];
}
static int init_cpunode_map(void)
@@ -437,13 +421,13 @@ static int init_cpunode_map(void)
set_max_cpu_num();
set_max_node_num();
- cpunode_map = calloc(max_cpu_num, sizeof(int));
+ cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
return -1;
}
- for (i = 0; i < max_cpu_num; i++)
+ for (i = 0; i < max_cpu_num.cpu; i++)
cpunode_map[i] = -1;
return 0;
@@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void)
return 0;
}
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu)
-{
- return perf_cpu_map__idx(cpus, cpu) != -1;
-}
-
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx)
-{
- return cpus->map[idx];
-}
-
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu, start = -1;
+ int i, start = -1;
bool first = true;
size_t ret = 0;
#define COMMA first ? "" : ","
for (i = 0; i < map->nr + 1; i++) {
+ struct perf_cpu cpu = { .cpu = INT_MAX };
bool last = i == map->nr;
- cpu = last ? INT_MAX : map->map[i];
+ if (!last)
+ cpu = map->map[i];
if (start == -1) {
start = i;
if (last) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[i]);
+ map->map[i].cpu);
}
- } else if (((i - start) != (cpu - map->map[start])) || last) {
+ } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
int end = i - 1;
if (start == end) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[start]);
+ map->map[start].cpu);
} else {
ret += snprintf(buf + ret, size - ret,
"%s%d-%d", COMMA,
- map->map[start], map->map[end]);
+ map->map[start].cpu, map->map[end].cpu);
}
first = false;
start = i;
@@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
int i, cpu;
char *ptr = buf;
unsigned char *bitmap;
- int last_cpu = cpu_map__cpu(map, map->nr - 1);
+ struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
if (buf == NULL)
return 0;
- bitmap = zalloc(last_cpu / 8 + 1);
+ bitmap = zalloc(last_cpu.cpu / 8 + 1);
if (bitmap == NULL) {
buf[0] = '\0';
return 0;
}
for (i = 0; i < map->nr; i++) {
- cpu = cpu_map__cpu(map, i);
+ cpu = perf_cpu_map__cpu(map, i).cpu;
bitmap[cpu / 8] |= 1 << (cpu % 8);
}
- for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
@@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
return online;
}
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
- return a.thread == b.thread &&
- a.node == b.node &&
- a.socket == b.socket &&
- a.die == b.die &&
- a.core == b.core;
+ return a->thread == b->thread &&
+ a->node == b->node &&
+ a->socket == b->socket &&
+ a->die == b->die &&
+ a->core == b->core &&
+ a->cpu.cpu == b->cpu.cpu;
}
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
- return a.thread == -1 &&
- a.node == -1 &&
- a.socket == -1 &&
- a.die == -1 &&
- a.core == -1;
+ return a->thread == -1 &&
+ a->node == -1 &&
+ a->socket == -1 &&
+ a->die == -1 &&
+ a->core == -1 &&
+ a->cpu.cpu == -1;
}
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
+struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
.thread = -1,
.node = -1,
.socket = -1,
.die = -1,
- .core = -1
+ .core = -1,
+ .cpu = (struct perf_cpu){ .cpu = -1 },
};
return ret;
}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index a27eeaf086e8..703ae6d3386e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -2,71 +2,134 @@
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
-#include <stdio.h>
#include <stdbool.h>
+#include <stdio.h>
#include <internal/cpumap.h>
#include <perf/cpumap.h>
+/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
+ /** A value in the range 0 to number of threads. */
int thread;
+ /** The numa node X as read from /sys/devices/system/node/nodeX. */
int node;
+ /**
+ * The socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id.
+ */
int socket;
+ /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
+ /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
int core;
+ /** CPU aggregation, note there is one CPU for each SMT thread. */
+ struct perf_cpu cpu;
};
+/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */
struct cpu_aggr_map {
refcount_t refcnt;
+ /** Number of valid entries. */
int nr;
+ /** The entries. */
struct aggr_cpu_id map[];
};
struct perf_record_cpu_map_data;
struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
-struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data);
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
-int cpu_map__get_socket_id(int cpu);
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_die_id(int cpu);
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_core_id(int cpu);
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_node_id(int cpu);
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp);
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep);
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep);
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep);
const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
-static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+struct perf_cpu cpu__max_cpu(void);
+struct perf_cpu cpu__max_present_cpu(void);
+
+/**
+ * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
+ */
+static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
{
- if (!sock || s > sock->nr || s < 0)
- return 0;
- return sock->map[s];
+ return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
}
-int cpu__setup_cpunode_map(void);
+/**
+ * cpu__get_node - Returns the numa node X as read from
+ * /sys/devices/system/node/nodeX for the given CPU.
+ */
+int cpu__get_node(struct perf_cpu cpu);
+/**
+ * cpu__get_socket_id - Returns the socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU.
+ */
+int cpu__get_socket_id(struct perf_cpu cpu);
+/**
+ * cpu__get_die_id - Returns the die id as read from
+ * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
+ */
+int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_core_id - Returns the core id as read from
+ * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
+ */
+int cpu__get_core_id(struct perf_cpu cpu);
-int cpu__max_node(void);
-int cpu__max_cpu(void);
-int cpu__max_present_cpu(void);
-int cpu__get_node(int cpu);
+/**
+ * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry
+ * being empty.
+ */
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
+typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data);
+
+/**
+ * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in
+ * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value
+ * passed to it. The cpu_aggr_map is sorted with duplicate values removed.
+ */
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data);
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data);
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
+struct aggr_cpu_id aggr_cpu_id__empty(void);
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b);
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a);
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void);
+/**
+ * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with
+ * the socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated
+ * with the die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
+ * populated with the core, die and socket for cpu. The function signature is
+ * compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket
+ * populated with the cpu, core, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for
+ * cpu. The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 51b429c86f98..d275d843c155 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -165,7 +165,8 @@ static bool has_die_topology(void)
if (uname(&uts) < 0)
return false;
- if (strncmp(uts.machine, "x86_64", 6))
+ if (strncmp(uts.machine, "x86_64", 6) &&
+ strncmp(uts.machine, "s390x", 5))
return false;
scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
@@ -187,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void)
struct perf_cpu_map *map;
bool has_die = has_die_topology();
- ncpus = cpu__max_present_cpu();
+ ncpus = cpu__max_present_cpu().cpu;
/* build online CPU map */
map = perf_cpu_map__new(NULL);
@@ -218,7 +219,7 @@ struct cpu_topology *cpu_topology__new(void)
tp->core_cpus_list = addr;
for (i = 0; i < nr; i++) {
- if (!cpu_map__has(map, i))
+ if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
continue;
ret = build_cpu_topology(tp, i);
@@ -324,7 +325,7 @@ struct numa_topology *numa_topology__new(void)
if (!node_map)
goto out;
- nr = (u32) node_map->nr;
+ nr = (u32) perf_cpu_map__nr(node_map);
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
if (!tp)
@@ -333,7 +334,7 @@ struct numa_topology *numa_topology__new(void)
tp->nr = nr;
for (i = 0; i < nr; i++) {
- if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
+ if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
numa_topology__delete(tp);
tp = NULL;
break;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8f7705bbc2da..9e0aee276df8 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
+ if (flags & TEP_FIELD_IS_RELATIVE)
+ offset += fmtf->offset + fmtf->size;
}
if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2c06abf6dcd2..65e6c22f38e4 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op,
break;
case BINARY_PRINT_CHAR_DATA:
printed += color_fprintf(fp, color, "%c",
- isprint(ch) ? ch : '.');
+ isprint(ch) && isascii(ch) ? ch : '.');
break;
case BINARY_PRINT_CHAR_PAD:
printed += color_fprintf(fp, color, " ");
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index b9904896eb97..579e44c59914 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -285,13 +285,13 @@ out_enomem:
int perf_env__read_cpu_topology_map(struct perf_env *env)
{
- int cpu, nr_cpus;
+ int idx, nr_cpus;
if (env->cpu != NULL)
return 0;
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
nr_cpus = env->nr_cpus_avail;
if (nr_cpus == -1)
@@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
if (env->cpu == NULL)
return -ENOMEM;
- for (cpu = 0; cpu < nr_cpus; ++cpu) {
- env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
- env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
- env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
+ for (idx = 0; idx < nr_cpus; ++idx) {
+ struct perf_cpu cpu = { .cpu = idx };
+
+ env->cpu[idx].core_id = cpu__get_core_id(cpu);
+ env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
+ env->cpu[idx].die_id = cpu__get_die_id(cpu);
}
env->nr_cpus_avail = nr_cpus;
@@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env)
static int perf_env__read_nr_cpus_avail(struct perf_env *env)
{
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
return env->nr_cpus_avail ? 0 : -ENOENT;
}
@@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env)
return env->pmu_mappings;
}
-int perf_env__numa_node(struct perf_env *env, int cpu)
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
{
if (!env->nr_numa_map) {
struct numa_node *nn;
@@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map));
+ nr = max(nr, perf_cpu_map__max(nn->map).cpu);
}
nr++;
@@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
env->nr_numa_map = nr;
for (i = 0; i < env->nr_numa_nodes; i++) {
- int tmp, j;
+ struct perf_cpu tmp;
+ int j;
nn = &env->numa_nodes[i];
- perf_cpu_map__for_each_cpu(j, tmp, nn->map)
- env->numa_map[j] = i;
+ perf_cpu_map__for_each_cpu(tmp, j, nn->map)
+ env->numa_map[tmp.cpu] = i;
}
}
- return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1;
+ return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 163e5ec503a2..a3541f98e1fc 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/rbtree.h>
+#include "cpumap.h"
#include "rwsem.h"
struct perf_cpu_map;
@@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
-int perf_env__numa_node(struct perf_env *env, int cpu);
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index 7c554234b43d..7f234215147d 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
events_nr++;
- if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
- matched_cpus->nr < cpus->nr ||
- matched_cpus->nr < pmu->cpus->nr)) {
+ if (perf_cpu_map__nr(matched_cpus) > 0 &&
+ (perf_cpu_map__nr(unmatched_cpus) > 0 ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.cpus = perf_cpu_map__get(matched_cpus);
evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
- if (unmatched_cpus->nr > 0) {
+ if (perf_cpu_map__nr(unmatched_cpus) > 0) {
cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
buf1, pmu->name, evsel->name);
}
}
- if (matched_cpus->nr == 0) {
+ if (perf_cpu_map__nr(matched_cpus) == 0) {
evlist__remove(evlist, evsel);
evsel__delete(evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5f92319ce258..eaad04e1672a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -342,36 +342,65 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
return perf_thread_map__nr(evlist->core.threads);
}
-void evlist__cpu_iter_start(struct evlist *evlist)
-{
- struct evsel *pos;
-
- /*
- * Reset the per evsel cpu_iter. This is needed because
- * each evsel's cpumap may have a different index space,
- * and some operations need the index to modify
- * the FD xyarray (e.g. open, close)
- */
- evlist__for_each_entry(evlist, pos)
- pos->cpu_iter = 0;
-}
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
+{
+ struct evlist_cpu_iterator itr = {
+ .container = evlist,
+ .evsel = evlist__first(evlist),
+ .cpu_map_idx = 0,
+ .evlist_cpu_map_idx = 0,
+ .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
+ .cpu = (struct perf_cpu){ .cpu = -1},
+ .affinity = affinity,
+ };
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
-{
- if (ev->cpu_iter >= ev->core.cpus->nr)
- return true;
- if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
- return true;
- return false;
+ if (itr.affinity) {
+ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+ affinity__set(itr.affinity, itr.cpu.cpu);
+ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (itr.cpu_map_idx == -1)
+ evlist_cpu_iterator__next(&itr);
+ }
+ return itr;
+}
+
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+ while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
+ evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ if (evlist_cpu_itr->cpu_map_idx != -1)
+ return;
+ }
+ evlist_cpu_itr->evlist_cpu_map_idx++;
+ if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
+ evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
+ evlist_cpu_itr->cpu =
+ perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
+ evlist_cpu_itr->evlist_cpu_map_idx);
+ if (evlist_cpu_itr->affinity)
+ affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (evlist_cpu_itr->cpu_map_idx == -1)
+ evlist_cpu_iterator__next(evlist_cpu_itr);
+ }
}
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
{
- if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
- ev->cpu_iter++;
- return false;
- }
- return true;
+ return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
}
static int evsel__strcmp(struct evsel *pos, char *evsel_name)
@@ -400,37 +429,36 @@ static int evlist__is_enabled(struct evlist *evlist)
static void __evlist__disable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i, imm = 0;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
bool has_imm = false;
- if (affinity__setup(&affinity) < 0)
- return;
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
/* Disable 'immediate' events last */
- for (imm = 0; imm <= 1; imm++) {
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
-
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- if (pos->immediate)
- has_imm = true;
- if (pos->immediate != imm)
- continue;
- evsel__disable_cpu(pos, pos->cpu_iter - 1);
- }
+ for (int imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
if (!has_imm)
break;
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -462,26 +490,25 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
static void __evlist__enable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i;
-
- if (affinity__setup(&affinity) < 0)
- return;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (!evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- evsel__enable_cpu(pos, pos->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -800,7 +827,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
struct mmap *map = container_of(_map, struct mmap, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
@@ -1264,14 +1291,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
+ struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity affinity;
- int cpu, i;
/*
* With perf record core.cpus is usually NULL.
* Use the old method to handle this for now.
*/
- if (!evlist->core.cpus) {
+ if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) {
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
return;
@@ -1279,15 +1306,12 @@ void evlist__close(struct evlist *evlist)
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
- evlist__for_each_entry_reverse(evlist, evsel) {
- if (evsel__cpu_iter_skip(evsel, cpu))
- continue;
- perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
+ evlist_cpu_itr.cpu_map_idx);
}
+
affinity__cleanup(&affinity);
evlist__for_each_entry_reverse(evlist, evsel) {
perf_evsel__free_fd(&evsel->core);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 97bfb8d0be4f..64cba56fbc74 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -64,6 +64,7 @@ struct evlist {
struct evsel *selected;
struct events_stats stats;
struct perf_env *env;
+ const char *hybrid_pmu_name;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -110,6 +111,7 @@ int __evlist__add_default_attrs(struct evlist *evlist,
__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
int arch_evlist__add_default_attrs(struct evlist *evlist);
+struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
@@ -325,17 +327,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
-#define evlist__for_each_cpu(evlist, index, cpu) \
- evlist__cpu_iter_start(evlist); \
- perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
+/** Iterator state for evlist__for_each_cpu */
+struct evlist_cpu_iterator {
+ /** The list being iterated through. */
+ struct evlist *container;
+ /** The current evsel of the iterator. */
+ struct evsel *evsel;
+ /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
+ int cpu_map_idx;
+ /**
+ * The CPU map index corresponding to evlist->core.all_cpus for the
+ * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may
+ * contain fewer entries.
+ */
+ int evlist_cpu_map_idx;
+ /** The number of CPU map entries in evlist->core.all_cpus. */
+ int evlist_cpu_map_nr;
+ /** The current CPU of the iterator. */
+ struct perf_cpu cpu;
+ /** If present, used to set the affinity when switching between CPUs. */
+ struct affinity *affinity;
+};
+
+/**
+ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
+ * affinity, iterate over all CPUs and then the evlist
+ * for each evsel on that CPU. When switching between
+ * CPUs the affinity is set to the CPU to avoid IPIs
+ * during syscalls.
+ * @evlist_cpu_itr: the iterator instance.
+ * @evlist: evlist instance to iterate.
+ * @affinity: NULL or used to set the affinity to the current CPU.
+ */
+#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \
+ for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \
+ !evlist_cpu_iterator__end(&evlist_cpu_itr); \
+ evlist_cpu_iterator__next(&evlist_cpu_itr))
+
+/** Returns an iterator set to the first CPU/evsel of evlist. */
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
+/** Move to next element in iterator, updating CPU, evsel and the affinity. */
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
+/** Returns true when iterator is at the end of the CPUs and evlist. */
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
struct evsel *evlist__get_tracking_event(struct evlist *evlist);
void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
-void evlist__cpu_iter_start(struct evlist *evlist);
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
-
struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac0127be0459..22d3267ce294 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1064,6 +1064,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un
{
}
+static void evsel__set_default_freq_period(struct record_opts *opts,
+ struct perf_event_attr *attr)
+{
+ if (opts->freq) {
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -1130,14 +1141,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period) {
- if (opts->freq) {
- attr->freq = 1;
- attr->sample_freq = opts->freq;
- } else {
- attr->sample_period = opts->default_interval;
- }
- }
+ if ((evsel->is_libpfm_event && !attr->sample_period) ||
+ (!evsel->is_libpfm_event && (!attr->sample_period ||
+ opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)))
+ evsel__set_default_freq_period(opts, attr);
+
/*
* If attr->freq was set (here or earlier), ask for period
* to be sampled.
@@ -1372,9 +1381,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
}
/* Caller has to clear disabled after going through all CPUs. */
-int evsel__enable_cpu(struct evsel *evsel, int cpu)
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__enable_cpu(&evsel->core, cpu);
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__enable(struct evsel *evsel)
@@ -1387,9 +1396,9 @@ int evsel__enable(struct evsel *evsel)
}
/* Caller has to set disabled after going through all CPUs. */
-int evsel__disable_cpu(struct evsel *evsel, int cpu)
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__disable_cpu(&evsel->core, cpu);
+ return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__disable(struct evsel *evsel)
@@ -1455,7 +1464,7 @@ void evsel__delete(struct evsel *evsel)
free(evsel);
}
-void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -1463,12 +1472,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
if (!evsel->prev_raw_counts)
return;
- if (cpu == -1) {
+ if (cpu_map_idx == -1) {
tmp = evsel->prev_raw_counts->aggr;
evsel->prev_raw_counts->aggr = *count;
} else {
- tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
- *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
}
count->val = count->val - tmp.val;
@@ -1476,46 +1485,28 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
count->run = count->run - tmp.run;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled)
+static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
{
- s8 scaled = 0;
+ struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
- if (scale) {
- if (count->run == 0) {
- scaled = -1;
- count->val = 0;
- } else if (count->run < count->ena) {
- scaled = 1;
- count->val = (u64)((double) count->val * count->ena / count->run);
- }
- }
-
- if (pscaled)
- *pscaled = scaled;
-}
-
-static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
-{
- struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
-
- return perf_evsel__read(&evsel->core, cpu, thread, count);
+ return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
-static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run)
+static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
+ u64 val, u64 ena, u64 run)
{
struct perf_counts_values *count;
- count = perf_counts(counter->counts, cpu, thread);
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
count->val = val;
count->ena = ena;
count->run = run;
- perf_counts__set_loaded(counter->counts, cpu, thread, true);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
-static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data)
+static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
struct sample_read_value *v;
@@ -1534,7 +1525,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
v = (struct sample_read_value *) data;
- evsel__set_count(leader, cpu, thread, v[0].value, ena, run);
+ evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run);
for (i = 1; i < nr; i++) {
struct evsel *counter;
@@ -1543,13 +1534,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
if (!counter)
return -EINVAL;
- evsel__set_count(counter, cpu, thread, v[i].value, ena, run);
+ evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run);
}
return 0;
}
-static int evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
@@ -1570,67 +1561,67 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread)
ps->group_data = data;
}
- if (FD(leader, cpu, thread) < 0)
+ if (FD(leader, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (readn(FD(leader, cpu, thread), data, size) <= 0)
+ if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
return -errno;
- return evsel__process_group_data(leader, cpu, thread, data);
+ return evsel__process_group_data(leader, cpu_map_idx, thread, data);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
{
u64 read_format = evsel->core.attr.read_format;
if (read_format & PERF_FORMAT_GROUP)
- return evsel__read_group(evsel, cpu, thread);
+ return evsel__read_group(evsel, cpu_map_idx, thread);
- return evsel__read_one(evsel, cpu, thread);
+ return evsel__read_one(evsel, cpu_map_idx, thread);
}
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
{
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
- if (FD(evsel, cpu, thread) < 0)
+ if (FD(evsel, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
return -ENOMEM;
- if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
+ if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
return -errno;
- evsel__compute_deltas(evsel, cpu, thread, &count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
- *perf_counts(evsel->counts, cpu, thread) = count;
+ *perf_counts(evsel->counts, cpu_map_idx, thread) = count;
return 0;
}
static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
- int cpu)
+ int cpu_map_idx)
{
- int cpuid;
+ struct perf_cpu cpu;
- cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu);
- return perf_cpu_map__idx(other->core.cpus, cpuid);
+ cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ return perf_cpu_map__idx(other->core.cpus, cpu);
}
-static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu)
+static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
{
struct evsel *leader = evsel__leader(evsel);
if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
(!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
- return evsel__match_other_cpu(evsel, leader, cpu);
+ return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
}
- return cpu;
+ return cpu_map_idx;
}
-static int get_group_fd(struct evsel *evsel, int cpu, int thread)
+static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
{
struct evsel *leader = evsel__leader(evsel);
int fd;
@@ -1644,11 +1635,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
*/
BUG_ON(!leader->core.fd);
- cpu = evsel__hybrid_group_cpu(evsel, cpu);
- if (cpu == -1)
+ cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
+ if (cpu_map_idx == -1)
return -1;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
BUG_ON(fd == -1);
return fd;
@@ -1662,16 +1653,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int
}
static int update_fds(struct evsel *evsel,
- int nr_cpus, int cpu_idx,
+ int nr_cpus, int cpu_map_idx,
int nr_threads, int thread_idx)
{
struct evsel *pos;
- if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
+ if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
return -EINVAL;
evlist__for_each_entry(evsel->evlist, pos) {
- nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
+ nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
@@ -1685,10 +1676,10 @@ static int update_fds(struct evsel *evsel,
return 0;
}
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err)
+static bool evsel__ignore_missing_thread(struct evsel *evsel,
+ int nr_cpus, int cpu_map_idx,
+ struct perf_thread_map *threads,
+ int thread, int err)
{
pid_t ignore_pid = perf_thread_map__pid(threads, thread);
@@ -1711,7 +1702,7 @@ bool evsel__ignore_missing_thread(struct evsel *evsel,
* We should remove fd for missing_thread first
* because thread_map__remove() will decrease threads->nr.
*/
- if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
+ if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
return false;
if (thread_map__remove(threads, thread))
@@ -1800,7 +1791,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
nthreads = threads->nr;
if (evsel->core.fd == NULL &&
- perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
+ perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
@@ -1993,9 +1984,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
- int start_cpu, int end_cpu)
+ int start_cpu_map_idx, int end_cpu_map_idx)
{
- int cpu, thread, nthreads;
+ int idx, thread, nthreads;
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
@@ -2022,7 +2013,7 @@ fallback_missing_features:
display_attr(&evsel->core.attr);
- for (cpu = start_cpu; cpu < end_cpu; cpu++) {
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@@ -2033,17 +2024,18 @@ retry_open:
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
- group_fd = get_group_fd(evsel, cpu, thread);
+ group_fd = get_group_fd(evsel, idx, thread);
test_attr__ready();
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, evsel->open_flags);
+ pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd = sys_perf_event_open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx).cpu,
group_fd, evsel->open_flags);
- FD(evsel, cpu, thread) = fd;
+ FD(evsel, idx, thread) = fd;
if (fd < 0) {
err = -errno;
@@ -2053,10 +2045,11 @@ retry_open:
goto try_fallback;
}
- bpf_counter__install_pe(evsel, cpu, fd);
+ bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled)) {
- test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ test_attr__open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx),
fd, group_fd, evsel->open_flags);
}
@@ -2097,7 +2090,8 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+ if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+ idx, threads, thread, err)) {
/* We just removed 1 thread, so lower the upper nthreads limit. */
nthreads--;
@@ -2112,7 +2106,7 @@ try_fallback:
if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
goto retry_open;
- if (err != -EINVAL || cpu > 0 || thread > 0)
+ if (err != -EINVAL || idx > 0 || thread > 0)
goto out_close;
if (evsel__detect_missing_features(evsel))
@@ -2124,12 +2118,12 @@ out_close:
old_errno = errno;
do {
while (--thread >= 0) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
}
thread = nthreads;
- } while (--cpu >= 0);
+ } while (--idx >= 0);
errno = old_errno;
return err;
}
@@ -2137,7 +2131,7 @@ out_close:
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
}
void evsel__close(struct evsel *evsel)
@@ -2146,13 +2140,12 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
{
- if (cpu == -1)
- return evsel__open_cpu(evsel, cpus, NULL, 0,
- cpus ? cpus->nr : 1);
+ if (cpu_map_idx == -1)
+ return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
- return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
+ return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
}
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
@@ -2706,6 +2699,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(sample->raw_data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
return sample->raw_data + offset;
@@ -2950,6 +2945,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "wrong clockid (%d).", clockid);
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
+ if (!target__has_cpu(target))
+ return scnprintf(msg, size,
+ "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
+ evsel__name(evsel));
break;
case ENODATA:
return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
@@ -2973,15 +2972,15 @@ struct perf_env *evsel__env(struct evsel *evsel)
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
- int cpu, thread;
+ int cpu_map_idx, thread;
- for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) {
+ for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
- int fd = FD(evsel, cpu, thread);
+ int fd = FD(evsel, cpu_map_idx, thread);
if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
- cpu, thread, fd) < 0)
+ cpu_map_idx, thread, fd) < 0)
return -1;
}
}
@@ -2994,7 +2993,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
- if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
+ if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
return -ENOMEM;
return store_evsel_ids(evsel, evlist);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 29d49a8c1e92..041b42d33bf5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include <internal/cpumap.h>
+#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@@ -121,7 +122,6 @@ struct evsel {
bool errored;
struct hashmap *per_pkg_mask;
int err;
- int cpu_iter;
struct {
evsel__sb_cb_t *cb;
void *data;
@@ -192,12 +192,9 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
static inline int evsel__nr_cpus(struct evsel *evsel)
{
- return evsel__cpus(evsel)->nr;
+ return perf_cpu_map__nr(evsel__cpus(evsel));
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled);
-
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
@@ -288,12 +285,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
-int evsel__enable_cpu(struct evsel *evsel, int cpu);
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
-int evsel__disable_cpu(struct evsel *evsel, int cpu);
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
@@ -305,10 +302,6 @@ bool evsel__detect_missing_features(struct evsel *evsel);
enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err);
bool evsel__precise_ip_fallback(struct evsel *evsel);
struct perf_sample;
@@ -337,32 +330,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
(e1->core.attr.config == e2->core.attr.config);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
/**
* evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, false);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
}
/**
* evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, true);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
}
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 666b59baeb70..675f318ce7c1 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -405,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data)
double expr__get_literal(const char *literal)
{
static struct cpu_topology *topology;
+ double result = NAN;
- if (!strcmp("#smt_on", literal))
- return smt_on() > 0 ? 1.0 : 0.0;
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on() > 0 ? 1.0 : 0.0;
+ goto out;
+ }
- if (!strcmp("#num_cpus", literal))
- return cpu__max_present_cpu();
+ if (!strcmp("#num_cpus", literal)) {
+ result = cpu__max_present_cpu().cpu;
+ goto out;
+ }
/*
* Assume that topology strings are consistent, such as CPUs "0-1"
@@ -422,16 +427,24 @@ double expr__get_literal(const char *literal)
topology = cpu_topology__new();
if (!topology) {
pr_err("Error creating CPU topology");
- return NAN;
+ goto out;
}
}
- if (!strcmp("#num_packages", literal))
- return topology->package_cpus_lists;
- if (!strcmp("#num_dies", literal))
- return topology->die_cpus_lists;
- if (!strcmp("#num_cores", literal))
- return topology->core_cpus_lists;
+ if (!strcmp("#num_packages", literal)) {
+ result = topology->package_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_dies", literal)) {
+ result = topology->die_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_cores", literal)) {
+ result = topology->core_cpus_lists;
+ goto out;
+ }
pr_err("Unrecognized literal '%s'", literal);
- return NAN;
+out:
+ pr_debug2("literal: %s = %f\n", literal, result);
+ return result;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000000..887f68a185f7
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,81 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e3c1a532d059..6da12e522edc 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff,
u32 nrc, nra;
int ret;
- nrc = cpu__max_present_cpu();
+ nrc = cpu__max_present_cpu().cpu;
nr = sysconf(_SC_NPROCESSORS_ONLN);
if (nr < 0)
@@ -1163,7 +1163,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
u32 nr, cpu;
u16 level;
- nr = cpu__max_cpu();
+ nr = cpu__max_cpu().cpu;
for (cpu = 0; cpu < nr; cpu++) {
for (level = 0; level < MAX_CACHE_LVL; level++) {
@@ -1195,7 +1195,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL;
+ u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL;
struct cpu_cache_level caches[max_caches];
u32 cnt = 0, i, version = 1;
int ret;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b776465e04ef..0a8033b09e28 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
- hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 621f35ae1efa..2a15e22fb89c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -75,7 +75,8 @@ enum hist_column {
HISTC_MEM_BLOCKED,
HISTC_LOCAL_INS_LAT,
HISTC_GLOBAL_INS_LAT,
- HISTC_P_STAGE_CYC,
+ HISTC_LOCAL_P_STAGE_CYC,
+ HISTC_GLOBAL_P_STAGE_CYC,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
index c397be0c2e32..15f60fd09424 100644
--- a/tools/perf/util/libunwind/arm64.c
+++ b/tools/perf/util/libunwind/arm64.c
@@ -23,7 +23,9 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
+#define perf_event_arm_regs perf_event_arm64_regs
#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"
/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb8496df8432..f70ba56912d4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -16,6 +16,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
+#include "path.h"
#include "srcline.h"
#include "symbol.h"
#include "sort.h"
@@ -34,6 +35,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -1415,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
- snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ path__join(path, sizeof(path), dir_name, dent->d_name);
if (stat(path, &st))
continue;
@@ -2710,6 +2712,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+ struct thread *thread, int usr_idx)
+{
+ if (machine__normalized_is(thread->maps->machine, "arm64"))
+ return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+ else
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2723,9 +2734,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
- int i, j, err, nr_entries;
+ int i, j, err, nr_entries, usr_idx;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
if (chain)
chain_nr = chain->nr;
@@ -2850,6 +2862,34 @@ check_calls:
continue;
}
+ /*
+ * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+ * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+ * the index will be different in order to add the missing frame
+ * at the right place.
+ */
+
+ usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+ if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+ /*
+ * check if leaf_frame_Caller != ip to not add the same
+ * value twice.
+ */
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
@@ -3079,14 +3119,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
}
/*
- * Compares the raw arch string. N.B. see instead perf_env__arch() if a
- * normalized arch is needed.
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
*/
bool machine__is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
int machine__nr_cpus_avail(struct machine *machine)
{
return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a143087eeb47..c5a45dc8df4c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine)
}
bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3167b4628b6d..ed0ab838bcc5 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -309,6 +309,9 @@ static const char * const mem_hops[] = {
* to be set with mem_hops field.
*/
"core, same node",
+ "node, same socket",
+ "socket, same board",
+ "board",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
@@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
size_t i, l = 0;
u64 m = PERF_MEM_LVL_NA;
u64 hit, miss;
- int printed;
+ int printed = 0;
if (mem_info)
m = mem_info->data_src.mem_lvl;
@@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
l += 7;
}
- if (mem_info && mem_info->data_src.mem_hops)
+ /*
+ * Incase mem_hops field is set, we can skip printing data source via
+ * PERF_MEM_LVL namespace.
+ */
+ if (mem_info && mem_info->data_src.mem_hops) {
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
-
- printed = 0;
- for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
- if (!(m & 0x1))
- continue;
- if (printed++) {
- strcat(out, " or ");
- l += 4;
+ } else {
+ for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
- l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
if (mem_info && mem_info->data_src.mem_lvl_num) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..d8492e339521 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe,
m->metric_name = pe->metric_name;
m->modifier = modifier ? strdup(modifier) : NULL;
if (modifier && !m->modifier) {
- free(m);
expr__ctx_free(m->pctx);
+ free(m);
return NULL;
}
m->metric_expr = pe->metric_expr;
@@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids,
*/
metric_id = evsel__metric_id(ev);
evlist__for_each_entry_continue(metric_evlist, ev) {
- if (!strcmp(evsel__metric_id(metric_events[i]), metric_id))
+ if (!strcmp(evsel__metric_id(ev), metric_id))
ev->metric_leader = metric_events[i];
}
}
@@ -1115,13 +1115,27 @@ out:
return ret;
}
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ * the front. duration_time is excluded from the count.
+ */
static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
const struct list_head *r)
{
const struct metric *left = container_of(l, struct metric, nd);
const struct metric *right = container_of(r, struct metric, nd);
+ struct expr_id_data *data;
+ int left_count, right_count;
+
+ left_count = hashmap__size(left->pctx->ids);
+ if (!expr__get_id(left->pctx, "duration_time", &data))
+ left_count--;
+
+ right_count = hashmap__size(right->pctx->ids);
+ if (!expr__get_id(right->pctx, "duration_time", &data))
+ right_count--;
- return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids);
+ return right_count - left_count;
}
/**
@@ -1299,14 +1313,16 @@ err_out:
/**
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
* @fake_pmu: used when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @has_constraint: false if events should be placed in a weak group.
* @out_evlist: the created list of events.
*/
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
- const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ bool has_constraint, struct evlist **out_evlist)
{
struct parse_events_error parse_error;
struct evlist *parsed_evlist;
@@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
int ret;
*out_evlist = NULL;
- if (hashmap__size(ids->ids) == 0) {
+ if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
char *tmp;
/*
- * No ids/events in the expression parsing context. Events may
- * have been removed because of constant evaluation, e.g.:
- * event1 if #smt_on else 0
+ * We may fail to share events between metrics because
+ * duration_time isn't present in one metric. For example, a
+ * ratio of cache misses doesn't need duration_time but the same
+ * events may be used for a misses per second. Events without
+ * sharing implies multiplexing, that is best avoided, so place
+ * duration_time in every group.
+ *
+ * Also, there may be no ids/events in the expression parsing
+ * context because of constant evaluation, e.g.:
+ * event1 if #smt_on else 0
* Add a duration_time event to avoid a parse error on an empty
* string.
*/
@@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
ret = build_combined_expr_ctx(&metric_list, &combined);
if (!ret && combined && hashmap__size(combined->ids)) {
- ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+ ret = parse_ids(metric_no_merge, fake_pmu, combined,
+ /*modifier=*/NULL,
/*has_constraint=*/true,
&combined_evlist);
}
@@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
}
if (!metric_evlist) {
- ret = parse_ids(fake_pmu, m->pctx, m->modifier,
+ ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
m->has_constraint, &m->evlist);
if (ret)
goto out;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 23ecdba9e670..0e8ff8d1e206 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
}
-static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
+static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity)
{
void *data;
size_t mmap_len;
@@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
- int cpu __maybe_unused, int affinity __maybe_unused)
+ struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused)
{
return 0;
}
@@ -240,7 +240,8 @@ void mmap__munmap(struct mmap *map)
static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
- int c, cpu, nr_cpus;
+ int idx, nr_cpus;
+ struct perf_cpu cpu;
const struct perf_cpu_map *cpu_map = NULL;
cpu_map = cpu_map__online();
@@ -248,16 +249,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
return;
nr_cpus = perf_cpu_map__nr(cpu_map);
- for (c = 0; c < nr_cpus; c++) {
- cpu = cpu_map->map[c]; /* map c index to online cpu index */
+ for (idx = 0; idx < nr_cpus; idx++) {
+ cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
- set_bit(cpu, mask->bits);
+ set_bit(cpu.cpu, mask->bits);
}
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
{
- map->affinity_mask.nbits = cpu__max_cpu();
+ map->affinity_mask.nbits = cpu__max_cpu().cpu;
map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
if (!map->affinity_mask.bits)
return -1;
@@ -265,12 +266,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
else if (mp->affinity == PERF_AFFINITY_CPU)
- set_bit(map->core.cpu, map->affinity_mask.bits);
+ set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
return 0;
}
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu)
{
if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
pr_debug2("failed to mmap perf event ring buffer, error %d\n",
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 8e259b9610f8..83f6bd4d4082 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/ring_buffer.h>
#include <linux/bitops.h>
+#include <perf/cpumap.h>
#include <stdbool.h>
#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
@@ -52,7 +53,7 @@ struct mmap_params {
struct auxtrace_mmap_params auxtrace_mp;
};
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
void mmap__munmap(struct mmap *map);
union perf_event *perf_mmap__read_forward(struct mmap *map);
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 608b20c72a5c..48aa3217300b 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces)
free(namespaces);
}
+static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
+{
+ FILE *f = NULL;
+ char *statln = NULL;
+ size_t linesz = 0;
+ char *nspid;
+
+ f = fopen(path, "r");
+ if (f == NULL)
+ return -1;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ nsi->nstgid = nsi->tgid;
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nspid = strrchr(statln, '\t');
+ nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /*
+ * If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+ break;
+ }
+ }
+
+ fclose(f);
+ free(statln);
+ return 0;
+}
+
int nsinfo__init(struct nsinfo *nsi)
{
char oldns[PATH_MAX];
char spath[PATH_MAX];
char *newns = NULL;
- char *statln = NULL;
- char *nspid;
struct stat old_stat;
struct stat new_stat;
- FILE *f = NULL;
- size_t linesz = 0;
int rv = -1;
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
@@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi)
if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
goto out;
- f = fopen(spath, "r");
- if (f == NULL)
- goto out;
-
- while (getline(&statln, &linesz, f) != -1) {
- /* Use tgid if CONFIG_PID_NS is not defined. */
- if (strstr(statln, "Tgid:") != NULL) {
- nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
- NULL, 10);
- nsi->nstgid = nsi->tgid;
- }
-
- if (strstr(statln, "NStgid:") != NULL) {
- nspid = strrchr(statln, '\t');
- nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
- /* If innermost tgid is not the first, process is in a different
- * PID namespace.
- */
- nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
- break;
- }
- }
- rv = 0;
+ rv = nsinfo__get_nspid(nsi, spath);
out:
- if (f != NULL)
- (void) fclose(f);
- free(statln);
free(newns);
return rv;
}
@@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
return ret;
}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+ struct nsinfo nsi;
+
+ memset(&nsi, 0x0, sizeof(nsi));
+ nsinfo__get_nspid(&nsi, "/proc/self/status");
+ return !nsi.in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index ad9775db7b9c..9ceea9643507 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc);
char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+bool nsinfo__is_in_root_namespace(void);
+
static inline void __nsinfo__zput(struct nsinfo **nsip)
{
if (nsip) {
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 9fc86971027b..284f8eabd3b9 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx,
static int pmu_cmp(struct parse_events_state *parse_state,
struct perf_pmu *pmu)
{
- if (!parse_state->hybrid_pmu_name)
- return 0;
+ if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
+ return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
+
+ if (parse_state->hybrid_pmu_name)
+ return strcmp(parse_state->hybrid_pmu_name, pmu->name);
- return strcmp(parse_state->hybrid_pmu_name, pmu->name);
+ return 0;
}
static int add_hw_hybrid(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ba74fdf74af9..9739b05b999e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1697,6 +1697,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
}
}
+
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, list, str, head,
+ true, true)) {
+ pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+ ok++;
+ }
+ }
+
out_err:
if (ok)
*listp = list;
@@ -1824,6 +1833,11 @@ out:
return ret;
}
+__weak struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ return list_first_entry(list, struct evsel, core.node);
+}
+
void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{
@@ -1837,9 +1851,10 @@ void parse_events__set_leader(char *name, struct list_head *list,
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
return;
- __perf_evlist__set_leader(list);
- leader = list_entry(list->next, struct evsel, core.node);
+ leader = arch_evlist__leader(list);
+ __perf_evlist__set_leader(list, &leader->core);
leader->group_name = name ? strdup(name) : NULL;
+ list_move(&leader->core.node, list);
}
/* list_event is assumed to point to malloc'ed memory */
@@ -2092,8 +2107,17 @@ static void perf_pmu__parse_init(void)
pmu = NULL;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- if (strchr(alias->name, '-'))
+ char *tmp = strchr(alias->name, '-');
+
+ if (tmp) {
+ char *tmp2 = NULL;
+
+ tmp2 = strchr(tmp + 1, '-');
len++;
+ if (tmp2)
+ len++;
+ }
+
len++;
}
}
@@ -2113,8 +2137,20 @@ static void perf_pmu__parse_init(void)
list_for_each_entry(alias, &pmu->aliases, list) {
struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
char *tmp = strchr(alias->name, '-');
+ char *tmp2 = NULL;
- if (tmp != NULL) {
+ if (tmp)
+ tmp2 = strchr(tmp + 1, '-');
+ if (tmp2) {
+ SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+ PMU_EVENT_SYMBOL_PREFIX);
+ p++;
+ tmp++;
+ SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
+ p++;
+ SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
+ len += 3;
+ } else if (tmp) {
SET_SYMBOL(strndup(alias->name, tmp - alias->name),
PMU_EVENT_SYMBOL_PREFIX);
p++;
@@ -2141,23 +2177,38 @@ err:
*/
int perf_pmu__test_parse_init(void)
{
- struct perf_pmu_event_symbol *list;
+ struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
+ {(char *)"read", PMU_EVENT_SYMBOL},
+ {(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
+ {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
+ };
+ unsigned long i, j;
- list = malloc(sizeof(*list) * 1);
+ tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
if (!list)
return -ENOMEM;
- list->type = PMU_EVENT_SYMBOL;
- list->symbol = strdup("read");
-
- if (!list->symbol) {
- free(list);
- return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
+ tmp->type = symbols[i].type;
+ tmp->symbol = strdup(symbols[i].symbol);
+ if (!list->symbol)
+ goto err_free;
}
perf_pmu_events_list = list;
- perf_pmu_events_list_num = 1;
+ perf_pmu_events_list_num = ARRAY_SIZE(symbols);
+
+ qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
return 0;
+
+err_free:
+ for (j = 0, tmp = list; j < i; j++, tmp++)
+ free(tmp->symbol);
+ free(list);
+ return -ENOMEM;
}
enum perf_pmu_event_symbol_type
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c7fc93f54577..a38b8b160e80 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type {
PMU_EVENT_SYMBOL, /* normal style PMU event */
PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
+ PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */
};
struct perf_pmu_event_symbol {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4efe9872c667..5b6e4b5249cf 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat
return PE_PMU_EVENT_PRE;
case PMU_EVENT_SYMBOL_SUFFIX:
return PE_PMU_EVENT_SUF;
+ case PMU_EVENT_SYMBOL_SUFFIX2:
+ return PE_PMU_EVENT_SUF2;
case PMU_EVENT_SYMBOL:
return parse_state->fake_pmu
? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 174158982fae..be8c51770051 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%type <str> event_pmu_name
%destructor { free ($$); } <str>
@@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config
$$ = list;
}
|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
+{
+ struct list_head *list;
+ char pmu_name[128];
+ snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+ free($1);
+ free($3);
+ free($5);
+ if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+|
PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
{
struct list_head *list;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 020411682a3c..c28dd50bd571 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -11,7 +11,7 @@
typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str)
{
struct evlist *evlist;
struct evsel *evsel;
@@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
evsel = evlist__first(evlist);
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
pid = 0;
@@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
fn(evsel);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (errno == EINVAL)
err = -EINVAL;
@@ -61,12 +61,13 @@ static bool perf_probe_api(setup_probe_fn_t fn)
{
const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
struct perf_cpu_map *cpus;
- int cpu, ret, i = 0;
+ struct perf_cpu cpu;
+ int ret, i = 0;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
do {
@@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void)
.exclude_kernel = 1,
};
struct perf_cpu_map *cpus;
- int cpu, fd;
+ struct perf_cpu cpu;
+ int fd;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
- fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
if (fd < 0)
return false;
close(fd);
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 06a7461ba864..a982e40ee5a9 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <string.h>
#include "perf_regs.h"
#include "event.h"
@@ -20,6 +21,671 @@ uint64_t __weak arch__user_reg_mask(void)
}
#ifdef HAVE_PERF_REGS_SUPPORT
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+#include "../../arch/arm/include/uapi/asm/perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+#include "../../arch/mips/include/uapi/asm/perf_regs.h"
+#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
+#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
+#include "../../arch/s390/include/uapi/asm/perf_regs.h"
+#include "../../arch/x86/include/uapi/asm/perf_regs.h"
+
+static const char *__perf_reg_name_arm64(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_arm(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_csky(int id)
+{
+ switch (id) {
+ case PERF_REG_CSKY_A0:
+ return "a0";
+ case PERF_REG_CSKY_A1:
+ return "a1";
+ case PERF_REG_CSKY_A2:
+ return "a2";
+ case PERF_REG_CSKY_A3:
+ return "a3";
+ case PERF_REG_CSKY_REGS0:
+ return "regs0";
+ case PERF_REG_CSKY_REGS1:
+ return "regs1";
+ case PERF_REG_CSKY_REGS2:
+ return "regs2";
+ case PERF_REG_CSKY_REGS3:
+ return "regs3";
+ case PERF_REG_CSKY_REGS4:
+ return "regs4";
+ case PERF_REG_CSKY_REGS5:
+ return "regs5";
+ case PERF_REG_CSKY_REGS6:
+ return "regs6";
+ case PERF_REG_CSKY_REGS7:
+ return "regs7";
+ case PERF_REG_CSKY_REGS8:
+ return "regs8";
+ case PERF_REG_CSKY_REGS9:
+ return "regs9";
+ case PERF_REG_CSKY_SP:
+ return "sp";
+ case PERF_REG_CSKY_LR:
+ return "lr";
+ case PERF_REG_CSKY_PC:
+ return "pc";
+#if defined(__CSKYABIV2__)
+ case PERF_REG_CSKY_EXREGS0:
+ return "exregs0";
+ case PERF_REG_CSKY_EXREGS1:
+ return "exregs1";
+ case PERF_REG_CSKY_EXREGS2:
+ return "exregs2";
+ case PERF_REG_CSKY_EXREGS3:
+ return "exregs3";
+ case PERF_REG_CSKY_EXREGS4:
+ return "exregs4";
+ case PERF_REG_CSKY_EXREGS5:
+ return "exregs5";
+ case PERF_REG_CSKY_EXREGS6:
+ return "exregs6";
+ case PERF_REG_CSKY_EXREGS7:
+ return "exregs7";
+ case PERF_REG_CSKY_EXREGS8:
+ return "exregs8";
+ case PERF_REG_CSKY_EXREGS9:
+ return "exregs9";
+ case PERF_REG_CSKY_EXREGS10:
+ return "exregs10";
+ case PERF_REG_CSKY_EXREGS11:
+ return "exregs11";
+ case PERF_REG_CSKY_EXREGS12:
+ return "exregs12";
+ case PERF_REG_CSKY_EXREGS13:
+ return "exregs13";
+ case PERF_REG_CSKY_EXREGS14:
+ return "exregs14";
+ case PERF_REG_CSKY_TLS:
+ return "tls";
+ case PERF_REG_CSKY_HI:
+ return "hi";
+ case PERF_REG_CSKY_LO:
+ return "lo";
+#endif
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_mips(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_powerpc(int id)
+{
+ switch (id) {
+ case PERF_REG_POWERPC_R0:
+ return "r0";
+ case PERF_REG_POWERPC_R1:
+ return "r1";
+ case PERF_REG_POWERPC_R2:
+ return "r2";
+ case PERF_REG_POWERPC_R3:
+ return "r3";
+ case PERF_REG_POWERPC_R4:
+ return "r4";
+ case PERF_REG_POWERPC_R5:
+ return "r5";
+ case PERF_REG_POWERPC_R6:
+ return "r6";
+ case PERF_REG_POWERPC_R7:
+ return "r7";
+ case PERF_REG_POWERPC_R8:
+ return "r8";
+ case PERF_REG_POWERPC_R9:
+ return "r9";
+ case PERF_REG_POWERPC_R10:
+ return "r10";
+ case PERF_REG_POWERPC_R11:
+ return "r11";
+ case PERF_REG_POWERPC_R12:
+ return "r12";
+ case PERF_REG_POWERPC_R13:
+ return "r13";
+ case PERF_REG_POWERPC_R14:
+ return "r14";
+ case PERF_REG_POWERPC_R15:
+ return "r15";
+ case PERF_REG_POWERPC_R16:
+ return "r16";
+ case PERF_REG_POWERPC_R17:
+ return "r17";
+ case PERF_REG_POWERPC_R18:
+ return "r18";
+ case PERF_REG_POWERPC_R19:
+ return "r19";
+ case PERF_REG_POWERPC_R20:
+ return "r20";
+ case PERF_REG_POWERPC_R21:
+ return "r21";
+ case PERF_REG_POWERPC_R22:
+ return "r22";
+ case PERF_REG_POWERPC_R23:
+ return "r23";
+ case PERF_REG_POWERPC_R24:
+ return "r24";
+ case PERF_REG_POWERPC_R25:
+ return "r25";
+ case PERF_REG_POWERPC_R26:
+ return "r26";
+ case PERF_REG_POWERPC_R27:
+ return "r27";
+ case PERF_REG_POWERPC_R28:
+ return "r28";
+ case PERF_REG_POWERPC_R29:
+ return "r29";
+ case PERF_REG_POWERPC_R30:
+ return "r30";
+ case PERF_REG_POWERPC_R31:
+ return "r31";
+ case PERF_REG_POWERPC_NIP:
+ return "nip";
+ case PERF_REG_POWERPC_MSR:
+ return "msr";
+ case PERF_REG_POWERPC_ORIG_R3:
+ return "orig_r3";
+ case PERF_REG_POWERPC_CTR:
+ return "ctr";
+ case PERF_REG_POWERPC_LINK:
+ return "link";
+ case PERF_REG_POWERPC_XER:
+ return "xer";
+ case PERF_REG_POWERPC_CCR:
+ return "ccr";
+ case PERF_REG_POWERPC_SOFTE:
+ return "softe";
+ case PERF_REG_POWERPC_TRAP:
+ return "trap";
+ case PERF_REG_POWERPC_DAR:
+ return "dar";
+ case PERF_REG_POWERPC_DSISR:
+ return "dsisr";
+ case PERF_REG_POWERPC_SIER:
+ return "sier";
+ case PERF_REG_POWERPC_MMCRA:
+ return "mmcra";
+ case PERF_REG_POWERPC_MMCR0:
+ return "mmcr0";
+ case PERF_REG_POWERPC_MMCR1:
+ return "mmcr1";
+ case PERF_REG_POWERPC_MMCR2:
+ return "mmcr2";
+ case PERF_REG_POWERPC_MMCR3:
+ return "mmcr3";
+ case PERF_REG_POWERPC_SIER2:
+ return "sier2";
+ case PERF_REG_POWERPC_SIER3:
+ return "sier3";
+ case PERF_REG_POWERPC_PMC1:
+ return "pmc1";
+ case PERF_REG_POWERPC_PMC2:
+ return "pmc2";
+ case PERF_REG_POWERPC_PMC3:
+ return "pmc3";
+ case PERF_REG_POWERPC_PMC4:
+ return "pmc4";
+ case PERF_REG_POWERPC_PMC5:
+ return "pmc5";
+ case PERF_REG_POWERPC_PMC6:
+ return "pmc6";
+ case PERF_REG_POWERPC_SDAR:
+ return "sdar";
+ case PERF_REG_POWERPC_SIAR:
+ return "siar";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_riscv(int id)
+{
+ switch (id) {
+ case PERF_REG_RISCV_PC:
+ return "pc";
+ case PERF_REG_RISCV_RA:
+ return "ra";
+ case PERF_REG_RISCV_SP:
+ return "sp";
+ case PERF_REG_RISCV_GP:
+ return "gp";
+ case PERF_REG_RISCV_TP:
+ return "tp";
+ case PERF_REG_RISCV_T0:
+ return "t0";
+ case PERF_REG_RISCV_T1:
+ return "t1";
+ case PERF_REG_RISCV_T2:
+ return "t2";
+ case PERF_REG_RISCV_S0:
+ return "s0";
+ case PERF_REG_RISCV_S1:
+ return "s1";
+ case PERF_REG_RISCV_A0:
+ return "a0";
+ case PERF_REG_RISCV_A1:
+ return "a1";
+ case PERF_REG_RISCV_A2:
+ return "a2";
+ case PERF_REG_RISCV_A3:
+ return "a3";
+ case PERF_REG_RISCV_A4:
+ return "a4";
+ case PERF_REG_RISCV_A5:
+ return "a5";
+ case PERF_REG_RISCV_A6:
+ return "a6";
+ case PERF_REG_RISCV_A7:
+ return "a7";
+ case PERF_REG_RISCV_S2:
+ return "s2";
+ case PERF_REG_RISCV_S3:
+ return "s3";
+ case PERF_REG_RISCV_S4:
+ return "s4";
+ case PERF_REG_RISCV_S5:
+ return "s5";
+ case PERF_REG_RISCV_S6:
+ return "s6";
+ case PERF_REG_RISCV_S7:
+ return "s7";
+ case PERF_REG_RISCV_S8:
+ return "s8";
+ case PERF_REG_RISCV_S9:
+ return "s9";
+ case PERF_REG_RISCV_S10:
+ return "s10";
+ case PERF_REG_RISCV_S11:
+ return "s11";
+ case PERF_REG_RISCV_T3:
+ return "t3";
+ case PERF_REG_RISCV_T4:
+ return "t4";
+ case PERF_REG_RISCV_T5:
+ return "t5";
+ case PERF_REG_RISCV_T6:
+ return "t6";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_s390(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_x86(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+
+#define XMM(x) \
+ case PERF_REG_X86_XMM ## x: \
+ case PERF_REG_X86_XMM ## x + 1: \
+ return "XMM" #x;
+ XMM(0)
+ XMM(1)
+ XMM(2)
+ XMM(3)
+ XMM(4)
+ XMM(5)
+ XMM(6)
+ XMM(7)
+ XMM(8)
+ XMM(9)
+ XMM(10)
+ XMM(11)
+ XMM(12)
+ XMM(13)
+ XMM(14)
+ XMM(15)
+#undef XMM
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+const char *perf_reg_name(int id, const char *arch)
+{
+ const char *reg_name = NULL;
+
+ if (!strcmp(arch, "csky"))
+ reg_name = __perf_reg_name_csky(id);
+ else if (!strcmp(arch, "mips"))
+ reg_name = __perf_reg_name_mips(id);
+ else if (!strcmp(arch, "powerpc"))
+ reg_name = __perf_reg_name_powerpc(id);
+ else if (!strcmp(arch, "riscv"))
+ reg_name = __perf_reg_name_riscv(id);
+ else if (!strcmp(arch, "s390"))
+ reg_name = __perf_reg_name_s390(id);
+ else if (!strcmp(arch, "x86"))
+ reg_name = __perf_reg_name_x86(id);
+ else if (!strcmp(arch, "arm"))
+ reg_name = __perf_reg_name_arm(id);
+ else if (!strcmp(arch, "arm64"))
+ reg_name = __perf_reg_name_arm64(id);
+
+ return reg_name ?: "unknown";
+}
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{
int i, idx = 0;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index eeac181ebccf..ce1127af05e4 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -11,8 +11,11 @@ struct sample_reg {
const char *name;
uint64_t mask;
};
-#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
-#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
#define SMPL_REG_END { .name = NULL }
enum {
@@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[];
#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+const char *perf_reg_name(int id, const char *arch);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
-static inline const char *perf_reg_name(int id)
-{
- const char *reg_name = __perf_reg_name(id);
-
- return reg_name ?: "unknown";
-}
-
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
#define DWARF_MINIMAL_REGS PERF_REGS_MASK
-static inline const char *perf_reg_name(int id __maybe_unused)
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
{
return "unknown";
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b2a02c9ab8ea..a834918a0a0d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ if (sym->type != STT_FUNC)
+ continue;
+
/* There can be duplicated symbols in the map */
for (i = 0; i < j; i++)
if (sym->start == syms[i]->start) {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 7f782a31bda3..52d8995cfd73 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -636,17 +638,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- return pcpus->cpus->nr;
+ return perf_cpu_map__nr(pcpus->cpus);
}
static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- if (i >= pcpus->cpus->nr)
+ if (i >= perf_cpu_map__nr(pcpus->cpus))
return NULL;
- return Py_BuildValue("i", pcpus->cpus->map[i]);
+ return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
}
static PySequenceMethods pyrf_cpu_map__sequence_methods = {
@@ -1057,7 +1059,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu)
for (i = 0; i < evlist->core.nr_mmaps; i++) {
struct mmap *md = &evlist->mmap[i];
- if (md->core.cpu == cpu)
+ if (md->core.cpu.cpu == cpu)
return md;
}
@@ -1443,7 +1445,7 @@ error:
* Dummy, to avoid dragging all the test_attr infrastructure in the python
* binding.
*/
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
}
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index bff669b615ee..007a64681416 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call
if (opts->group)
evlist__set_leader(evlist);
- if (evlist->core.cpus->map[0] < 0)
+ if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0)
opts->no_inherit = true;
use_comm_exec = perf_can_comm_exec();
@@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
{
struct evlist *temp_evlist;
struct evsel *evsel;
- int err, fd, cpu;
+ int err, fd;
+ struct perf_cpu cpu = { .cpu = 0 };
bool ret = false;
pid_t pid = -1;
@@ -246,14 +247,16 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
- cpu = cpus ? cpus->map[0] : 0;
+ if (cpus)
+ cpu = perf_cpu_map__cpu(cpus, 0);
+
perf_cpu_map__put(cpus);
} else {
- cpu = evlist->core.cpus->map[0];
+ cpu = perf_cpu_map__cpu(evlist->core.cpus, 0);
}
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
perf_event_open_cloexec_flag());
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 32a721b3e9a5..a5d945415bbc 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
offset = field->offset;
XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c0c010350bc2..e752e1f4a5f0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -36,6 +36,7 @@
#include "../debug.h"
#include "../dso.h"
#include "../callchain.h"
+#include "../env.h"
#include "../evsel.h"
#include "../event.h"
#include "../thread.h"
@@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict,
_PyUnicode_FromString(decode));
}
-static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
{
unsigned int i = 0, r;
int printed = 0;
@@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
printed += scnprintf(bf + printed, size - printed,
"%5s:0x%" PRIx64 " ",
- perf_reg_name(r), val);
+ perf_reg_name(r, arch), val);
}
}
@@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict,
struct evsel *evsel)
{
struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
/*
* Here value 28 is a constant size which can be used to print
@@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict,
int size = __sw_hweight64(attr->sample_regs_intr) * 28;
char bf[size];
- regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "iregs",
_PyUnicode_FromString(bf));
- regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "uregs",
_PyUnicode_FromString(bf));
@@ -942,6 +944,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -1553,7 +1557,7 @@ static void get_handler_name(char *str, size_t size,
}
static void
-process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
+process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp,
struct perf_counts_values *count)
{
PyObject *handler, *t;
@@ -1573,7 +1577,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
return;
}
- PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu));
PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
tuple_set_u64(t, n++, tstamp);
@@ -1597,14 +1601,14 @@ static void python_process_stat(struct perf_stat_config *config,
int cpu, thread;
if (config->aggr_mode == AGGR_GLOBAL) {
- process_stat(counter, -1, -1, tstamp,
+ process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
&counter->counts->aggr);
return;
}
for (thread = 0; thread < threads->nr; thread++) {
- for (cpu = 0; cpu < cpus->nr; cpu++) {
- process_stat(counter, cpus->map[cpu],
+ for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
perf_thread_map__pid(threads, thread), tstamp,
perf_counts(counter->counts, cpu, thread));
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d8857d1b6d7c..2c0d30f08e78 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,6 +15,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "debug.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
@@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
}
}
-static void regs_dump__printf(u64 mask, u64 *regs)
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
{
unsigned rid, i = 0;
@@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
u64 val = regs[i++];
printf(".... %-5s 0x%016" PRIx64 "\n",
- perf_reg_name(rid), val);
+ perf_reg_name(rid, arch), val);
}
}
@@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d)
return regs_abi[d->abi];
}
-static void regs__printf(const char *type, struct regs_dump *regs)
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
{
u64 mask = regs->mask;
@@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs)
mask,
regs_dump_abi(regs));
- regs_dump__printf(mask, regs->regs);
+ regs_dump__printf(mask, regs->regs, arch);
}
-static void regs_user__printf(struct perf_sample *sample)
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *user_regs = &sample->user_regs;
if (user_regs->regs)
- regs__printf("user", user_regs);
+ regs__printf("user", user_regs, arch);
}
-static void regs_intr__printf(struct perf_sample *sample)
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *intr_regs = &sample->intr_regs;
if (intr_regs->regs)
- regs__printf("intr", intr_regs);
+ regs__printf("intr", intr_regs, arch);
}
static void stack_user__printf(struct stack_dump *dump)
@@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str)
}
static void dump_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample)
+ struct perf_sample *sample, const char *arch)
{
u64 sample_type;
char str[PAGE_SIZE_NAME_LEN];
@@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
- regs_user__printf(sample);
+ regs_user__printf(sample, arch);
if (sample_type & PERF_SAMPLE_REGS_INTR)
- regs_intr__printf(sample);
+ regs_intr__printf(sample, arch);
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
@@ -1502,7 +1503,7 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
- dump_sample(evsel, event, sample);
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
return 0;
@@ -2536,16 +2537,16 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < map->nr; i++) {
- int cpu = map->map[i];
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
- if (cpu >= nr_cpus) {
+ if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
- "Consider raising MAX_NR_CPUS\n", cpu);
+ "Consider raising MAX_NR_CPUS\n", cpu.cpu);
goto out_delete_map;
}
- set_bit(cpu, cpu_bitmap);
+ set_bit(cpu.cpu, cpu_bitmap);
}
err = 0;
@@ -2597,7 +2598,7 @@ int perf_event__process_id_index(struct perf_session *session,
if (!sid)
return -ENOENT;
sid->idx = e->idx;
- sid->cpu = e->cpu;
+ sid->cpu.cpu = e->cpu;
sid->tid = e->tid;
}
return 0;
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 34f1b1b1176c..2b0a36ebf27a 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -5,6 +5,56 @@
#include "api/fs/fs.h"
#include "smt.h"
+/**
+ * hweight_str - Returns the number of bits set in str. Stops at first non-hex
+ * or ',' character.
+ */
+static int hweight_str(char *str)
+{
+ int result = 0;
+
+ while (*str) {
+ switch (*str++) {
+ case '0':
+ case ',':
+ break;
+ case '1':
+ case '2':
+ case '4':
+ case '8':
+ result++;
+ break;
+ case '3':
+ case '5':
+ case '6':
+ case '9':
+ case 'a':
+ case 'A':
+ case 'c':
+ case 'C':
+ result += 2;
+ break;
+ case '7':
+ case 'b':
+ case 'B':
+ case 'd':
+ case 'D':
+ case 'e':
+ case 'E':
+ result += 3;
+ break;
+ case 'f':
+ case 'F':
+ result += 4;
+ break;
+ default:
+ goto done;
+ }
+ }
+done:
+ return result;
+}
+
int smt_on(void)
{
static bool cached;
@@ -15,9 +65,12 @@ int smt_on(void)
if (cached)
return cached_result;
- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0)
- goto done;
+ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) {
+ cached = true;
+ return cached_result;
+ }
+ cached_result = 0;
ncpu = sysconf(_SC_NPROCESSORS_CONF);
for (cpu = 0; cpu < ncpu; cpu++) {
unsigned long long siblings;
@@ -26,27 +79,21 @@ int smt_on(void)
char fn[256];
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
+ "devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0) {
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings",
- cpu);
+ "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0)
continue;
}
/* Entry is hex, but does not have 0x, so need custom parser */
- siblings = strtoull(str, NULL, 16);
+ siblings = hweight_str(str);
free(str);
- if (hweight64(siblings) > 1) {
+ if (siblings > 1) {
cached_result = 1;
- cached = true;
break;
}
}
- if (!cached) {
- cached_result = 0;
-done:
- cached = true;
- }
+ cached = true;
return cached_result;
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a111065b484e..cfba8c337783 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -46,8 +46,8 @@ const char *field_order;
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
-const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
-const char *arch_specific_sort_keys[] = {"p_stage_cyc"};
+static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
/*
* Replaces all occurrences of a char used with the:
@@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
};
static int64_t
-sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->p_stage_cyc - right->p_stage_cyc;
}
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width,
+ he->p_stage_cyc * he->stat.nr_events);
+}
+
+
static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
}
-struct sort_entry sort_p_stage_cyc = {
- .se_header = "Pipeline Stage Cycle",
- .se_cmp = sort__global_p_stage_cyc_cmp,
+struct sort_entry sort_local_p_stage_cyc = {
+ .se_header = "Local Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
.se_snprintf = hist_entry__p_stage_cyc_snprintf,
- .se_width_idx = HISTC_P_STAGE_CYC,
+ .se_width_idx = HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+ .se_header = "Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__global_p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC,
};
struct sort_entry sort_mem_daddr_sym = {
@@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
- DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
+ DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
};
#undef DIM
@@ -2365,6 +2381,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
tep_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
/* record max width for output */
if (size > hde->dynamic_len)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b7145501933..f994261888e1 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,7 +235,8 @@ enum sort_type {
SORT_CODE_PAGE_SIZE,
SORT_LOCAL_INS_LAT,
SORT_GLOBAL_INS_LAT,
- SORT_PIPELINE_STAGE_CYC,
+ SORT_LOCAL_PIPELINE_STAGE_CYC,
+ SORT_GLOBAL_PIPELINE_STAGE_CYC,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 588601000f3f..5db83e51ceef 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <math.h>
+#include <perf/cpumap.h>
#include "color.h"
#include "counts.h"
#include "evlist.h"
@@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
- evsel__cpus(evsel)->map[id.core],
- config->csv_sep);
+ id.cpu.cpu, config->csv_sep);
}
break;
case AGGR_THREAD:
@@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu(struct perf_stat_config *config,
- struct evsel *evsel, struct aggr_cpu_id id)
+static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+ struct evsel *evsel, const struct aggr_cpu_id *id)
{
- struct evlist *evlist = evsel->evlist;
- int i;
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ struct perf_cpu cpu;
+ int idx;
if (config->aggr_mode == AGGR_NONE)
- return id.core;
+ return perf_cpu_map__idx(cpus, id->cpu);
if (!config->aggr_get_id)
return 0;
- for (i = 0; i < evsel__nr_cpus(evsel); i++) {
- int cpu2 = evsel__cpus(evsel)->map[i];
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
- if (cpu_map__compare_aggr_cpu_id(
- config->aggr_get_id(config, evlist->core.cpus, cpu2),
- id)) {
- return cpu2;
- }
+ if (aggr_cpu_id__equal(&cpu_id, id))
+ return idx;
}
return 0;
}
@@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only) {
print_noise(config, counter, noise);
@@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
static void aggr_update_shadow(struct perf_stat_config *config,
struct evlist *evlist)
{
- int cpu, s;
+ int idx, s;
+ struct perf_cpu cpu;
struct aggr_cpu_id s2, id;
u64 val;
struct evsel *counter;
+ struct perf_cpu_map *cpus;
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
+ cpus = evsel__cpus(counter);
val = 0;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, id))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &id))
continue;
- val += perf_counts(counter->counts, cpu, 0)->val;
+ val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -627,25 +628,28 @@ struct aggr_data {
u64 ena, run, val;
struct aggr_cpu_id id;
int nr;
- int cpu;
+ int cpu_map_idx;
};
static void aggr_cb(struct perf_stat_config *config,
struct evsel *counter, void *data, bool first)
{
struct aggr_data *ad = data;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
+ struct perf_cpu_map *cpus;
struct aggr_cpu_id s2;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
struct perf_counts_values *counts;
- s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, ad->id))
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &ad->id))
continue;
if (first)
ad->nr++;
- counts = perf_counts(counter->counts, cpu, 0);
+ counts = perf_counts(counter->counts, idx, 0);
/*
* When any result is bad, make them all to give
* consistent output in interval mode.
@@ -665,7 +669,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first, int cpu)
+ bool *first, struct perf_cpu cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -695,10 +699,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- if (cpu != -1) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
- }
+ if (cpu.cpu != -1)
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+
printout(config, id, nr, counter, uval,
prefix, run, ena, 1.0, &rt_stat);
if (!metric_only)
@@ -731,8 +734,8 @@ static void print_aggr(struct perf_stat_config *config,
first = true;
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
fputc('\n', output);
@@ -778,7 +781,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
continue;
buf[i].counter = counter;
- buf[i].id = cpu_map__empty_aggr_cpu_id();
+ buf[i].id = aggr_cpu_id__empty();
buf[i].id.thread = thread;
buf[i].uval = uval;
buf[i].val = val;
@@ -866,7 +869,7 @@ static void print_counter_aggr(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = cd.avg * counter->scale;
- printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running,
+ printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
cd.avg_enabled, cd.avg, &rt_stat);
if (!metric_only)
fprintf(output, "\n");
@@ -878,9 +881,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused,
{
struct aggr_data *ad = data;
- ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
- ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
- ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+ ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
+ ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
+ ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
}
/*
@@ -893,11 +896,12 @@ static void print_counter(struct perf_stat_config *config,
FILE *output = config->output;
u64 ena, run, val;
double uval;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
struct aggr_cpu_id id;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- struct aggr_data ad = { .cpu = cpu };
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
+ struct aggr_data ad = { .cpu_map_idx = idx };
if (!collect_data(config, counter, counter_cb, &ad))
return;
@@ -909,8 +913,7 @@ static void print_counter(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
printout(config, id, 0, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
@@ -922,29 +925,32 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
struct evlist *evlist,
char *prefix)
{
- int cpu;
- int nrcpus = 0;
- struct evsel *counter;
- u64 ena, run, val;
- double uval;
- struct aggr_cpu_id id;
+ int all_idx;
+ struct perf_cpu cpu;
- nrcpus = evlist->core.cpus->nr;
- for (cpu = 0; cpu < nrcpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) {
+ struct evsel *counter;
bool first = true;
if (prefix)
fputs(prefix, config->output);
evlist__for_each_entry(evlist, counter) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ u64 ena, run, val;
+ double uval;
+ struct aggr_cpu_id id;
+ int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+ if (counter_idx < 0)
+ continue;
+
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
aggr_printout(config, counter, id, 0);
first = false;
}
- val = perf_counts(counter->counts, cpu, 0)->val;
- ena = perf_counts(counter->counts, cpu, 0)->ena;
- run = perf_counts(counter->counts, cpu, 0)->run;
+ val = perf_counts(counter->counts, counter_idx, 0)->val;
+ ena = perf_counts(counter->counts, counter_idx, 0)->ena;
+ run = perf_counts(counter->counts, counter_idx, 0)->run;
uval = val * counter->scale;
printout(config, id, 0, counter, uval, prefix,
@@ -1208,19 +1214,23 @@ static void print_percore_thread(struct perf_stat_config *config,
{
int s;
struct aggr_cpu_id s2, id;
+ struct perf_cpu_map *cpus;
bool first = true;
+ int idx;
+ struct perf_cpu cpu;
- for (int i = 0; i < evsel__nr_cpus(counter); i++) {
- s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
- if (cpu_map__compare_aggr_cpu_id(s2, id))
+ if (aggr_cpu_id__equal(&s2, &id))
break;
}
print_counter_aggrdata(config, counter, s,
prefix, false,
- &first, i);
+ &first, cpu);
}
}
@@ -1243,8 +1253,8 @@ static void print_percore(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 5c7308efa768..10af7804e482 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -32,7 +32,7 @@ struct saved_value {
struct evsel *evsel;
enum stat_type type;
int ctx;
- int cpu;
+ int cpu_map_idx;
struct cgroup *cgrp;
struct runtime_stat *stat;
struct stats stats;
@@ -47,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->cpu != b->cpu)
- return a->cpu - b->cpu;
+ if (a->cpu_map_idx != b->cpu_map_idx)
+ return a->cpu_map_idx - b->cpu_map_idx;
/*
* Previously the rbtree was used to link generic metrics.
@@ -105,7 +105,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
bool create,
enum stat_type type,
int ctx,
@@ -115,7 +115,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
struct rblist *rblist;
struct rb_node *nd;
struct saved_value dm = {
- .cpu = cpu,
+ .cpu_map_idx = cpu_map_idx,
.evsel = evsel,
.type = type,
.ctx = ctx,
@@ -213,10 +213,10 @@ struct runtime_stat_data {
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int cpu, u64 count,
+ int cpu_map_idx, u64 count,
struct runtime_stat_data *rsd)
{
- struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
+ struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
@@ -229,7 +229,7 @@ static void update_runtime_stat(struct runtime_stat *st,
* instruction rates, etc:
*/
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st)
+ int cpu_map_idx, struct runtime_stat *st)
{
u64 count_ns = count;
struct saved_value *v;
@@ -241,88 +241,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
+ update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
+ v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu, true, STAT_NONE, 0, st, rsd.cgrp);
+ cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -464,12 +464,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -477,12 +477,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -490,7 +490,7 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -498,7 +498,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -513,7 +513,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -521,7 +521,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -532,7 +532,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -540,7 +540,7 @@ static void print_branch_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -551,7 +551,7 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -559,7 +559,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -570,7 +570,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -578,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -588,7 +588,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -596,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -606,7 +606,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -614,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -624,7 +624,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -632,7 +632,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -690,61 +690,61 @@ static double sanitize_val(double x)
return x;
}
-static double td_total_slots(int cpu, struct runtime_stat *st,
+static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
}
-static double td_bad_spec(int cpu, struct runtime_stat *st,
+static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
- total_slots = td_total_slots(cpu, st, rsd);
+ total_slots = td_total_slots(cpu_map_idx, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
-static double td_retiring(int cpu, struct runtime_stat *st,
+static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
-static double td_fe_bound(int cpu, struct runtime_stat *st,
+static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
-static double td_be_bound(int cpu, struct runtime_stat *st,
+static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- double sum = (td_fe_bound(cpu, st, rsd) +
- td_bad_spec(cpu, st, rsd) +
- td_retiring(cpu, st, rsd));
+ double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
+ td_bad_spec(cpu_map_idx, st, rsd) +
+ td_retiring(cpu_map_idx, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -755,15 +755,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st,
* the ratios we need to recreate the sum.
*/
-static double td_metric_ratio(int cpu, enum stat_type type,
+static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
- double d = runtime_stat_avg(stat, type, cpu, rsd);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
+ double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
if (sum)
return d / sum;
@@ -775,23 +775,23 @@ static double td_metric_ratio(int cpu, enum stat_type type,
* We allow two missing.
*/
-static bool full_td(int cpu, struct runtime_stat *stat,
+static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
c++;
return c >= 2;
}
-static void print_smi_cost(struct perf_stat_config *config, int cpu,
+static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -799,9 +799,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
double smi_num, aperf, cycles, cost = 0.0;
const char *color = NULL;
- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
- aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
- cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
+ aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@@ -818,7 +818,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
static int prepare_metric(struct evsel **metric_events,
struct metric_ref *metric_refs,
struct expr_parse_ctx *pctx,
- int cpu,
+ int cpu_map_idx,
struct runtime_stat *st)
{
double scale;
@@ -836,7 +836,7 @@ static int prepare_metric(struct evsel **metric_events,
scale = 1e-9;
source_count = 1;
} else {
- v = saved_value_lookup(metric_events[i], cpu, false,
+ v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
@@ -874,7 +874,7 @@ static void generic_metric(struct perf_stat_config *config,
const char *metric_name,
const char *metric_unit,
int runtime,
- int cpu,
+ int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
@@ -889,7 +889,7 @@ static void generic_metric(struct perf_stat_config *config,
return;
pctx->runtime = runtime;
- i = prepare_metric(metric_events, metric_refs, pctx, cpu, st);
+ i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -934,7 +934,7 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_free(pctx);
}
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
{
struct expr_parse_ctx *pctx;
double ratio = 0.0;
@@ -943,7 +943,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0)
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -956,7 +956,7 @@ out:
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu,
+ double avg, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st)
@@ -975,7 +975,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (config->iostat_run) {
iostat_print_metric(config, evsel, out);
} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -985,11 +985,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
- total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- cpu, &rsd));
+ cpu_map_idx, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@@ -999,8 +999,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
- print_branch_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
+ print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@@ -1009,8 +1009,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
- print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@@ -1019,8 +1019,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
- print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@@ -1029,8 +1029,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
- print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@@ -1039,8 +1039,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
- print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@@ -1049,27 +1049,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
- print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
+ print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
if (total)
ratio = avg * 100 / total;
- if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1078,7 +1078,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@@ -1088,8 +1088,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (total2 < avg)
total2 = avg;
@@ -1099,19 +1099,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
@@ -1124,28 +1124,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(cpu, st, &rsd);
+ double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(cpu, st, &rsd);
+ double retiring = td_retiring(cpu_map_idx, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(cpu, st, &rsd);
+ double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(cpu, st, &rsd);
+ double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@@ -1158,14 +1158,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(cpu, st, &rsd) > 0)
+ if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(cpu, st, &rsd)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
@@ -1173,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
@@ -1182,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
@@ -1191,8 +1191,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(cpu, st, &rsd)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
@@ -1200,11 +1200,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
- double heavy_ops = td_metric_ratio(cpu,
+ double heavy_ops = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_HEAVY_OPS, st,
&rsd);
double light_ops = retiring - heavy_ops;
@@ -1220,11 +1220,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "light operations",
light_ops * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
- double br_mis = td_metric_ratio(cpu,
+ double br_mis = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BR_MISPREDICT, st,
&rsd);
double m_clears = bad_spec - br_mis;
@@ -1240,11 +1240,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
m_clears * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
- double fetch_lat = td_metric_ratio(cpu,
+ double fetch_lat = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FETCH_LAT, st,
&rsd);
double fetch_bw = fe_bound - fetch_lat;
@@ -1260,11 +1260,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
fetch_bw * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
- double mem_bound = td_metric_ratio(cpu,
+ double mem_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_MEM_BOUND, st,
&rsd);
double core_bound = be_bound - mem_bound;
@@ -1281,12 +1281,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
+ evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total)
ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
@@ -1294,7 +1294,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
- print_smi_cost(config, cpu, out, st, &rsd);
+ print_smi_cost(config, cpu_map_idx, out, st, &rsd);
} else {
num = 0;
}
@@ -1307,7 +1307,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
mexp->metric_refs, evsel->name, mexp->metric_name,
- mexp->metric_unit, mexp->runtime, cpu, out, st);
+ mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 09ea334586f2..ee6f03481215 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -152,11 +152,13 @@ static void evsel__free_stat_priv(struct evsel *evsel)
zfree(&evsel->stats);
}
-static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
{
+ int cpu_map_nr = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
struct perf_counts *counts;
- counts = perf_counts__new(ncpus, nthreads);
+ counts = perf_counts__new(cpu_map_nr, nthreads);
if (counts)
evsel->prev_raw_counts = counts;
@@ -177,12 +179,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel)
static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
{
- int ncpus = evsel__nr_cpus(evsel);
- int nthreads = perf_thread_map__nr(evsel->core.threads);
-
if (evsel__alloc_stat_priv(evsel) < 0 ||
- evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
- (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ evsel__alloc_counts(evsel) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
return -ENOMEM;
return 0;
@@ -293,11 +292,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2,
return *key1 == *key2;
}
-static int check_per_pkg(struct evsel *counter,
- struct perf_counts_values *vals, int cpu, bool *skip)
+static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
+ int cpu_map_idx, bool *skip)
{
struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
+ struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
int s, d, ret = 0;
uint64_t *key;
@@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter,
if (!(vals->run && vals->ena))
return 0;
- s = cpu_map__get_socket(cpus, cpu, NULL).socket;
+ s = cpu__get_socket_id(cpu);
if (s < 0)
return -1;
@@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter,
* On multi-die system, die_id > 0. On no-die system, die_id = 0.
* We use hashmap(socket, die) to check the used socket+die pair.
*/
- d = cpu_map__get_die(cpus, cpu, NULL).die;
+ d = cpu__get_die_id(cpu);
if (d < 0)
return -1;
@@ -345,9 +345,10 @@ static int check_per_pkg(struct evsel *counter,
return -ENOMEM;
*key = (uint64_t)d << 32 | s;
- if (hashmap__find(mask, (void *)key, NULL))
+ if (hashmap__find(mask, (void *)key, NULL)) {
*skip = true;
- else
+ free(key);
+ } else
ret = hashmap__add(mask, (void *)key, (void *)1);
return ret;
@@ -355,14 +356,14 @@ static int check_per_pkg(struct evsel *counter,
static int
process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
- int cpu, int thread,
+ int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
- if (check_per_pkg(evsel, count, cpu, &skip)) {
+ if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
@@ -378,11 +379,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
case AGGR_NODE:
case AGGR_NONE:
if (!evsel->snapshot)
- evsel__compute_deltas(evsel, cpu, thread, count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
perf_stat__update_shadow_stats(evsel, count->val,
- cpu, &rt_stat);
+ cpu_map_idx, &rt_stat);
}
if (config->aggr_mode == AGGR_THREAD) {
@@ -411,15 +412,15 @@ static int process_counter_maps(struct perf_stat_config *config,
{
int nthreads = perf_thread_map__nr(counter->core.threads);
int ncpus = evsel__nr_cpus(counter);
- int cpu, thread;
+ int idx, thread;
if (counter->core.system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- if (process_counter_values(config, counter, cpu, thread,
- perf_counts(counter->counts, cpu, thread)))
+ for (idx = 0; idx < ncpus; idx++) {
+ if (process_counter_values(config, counter, idx, thread,
+ perf_counts(counter->counts, idx, thread)))
return -1;
}
}
@@ -531,7 +532,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu)
+ int cpu_map_idx)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel__leader(evsel);
@@ -585,7 +586,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
return evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 32c8527de347..335d19cc3063 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -108,8 +108,7 @@ struct runtime_stat {
struct rblist value_list;
};
-typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
- struct perf_cpu_map *m, int cpu);
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
struct perf_stat_config {
enum aggr_mode aggr_mode;
@@ -209,7 +208,7 @@ void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st);
+ int cpu_map_idx, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
@@ -249,10 +248,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu);
+ int cpu_map_idx);
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 96f941e01681..1e0c731fc539 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -728,20 +728,20 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
int i;
int ret = 0;
struct perf_cpu_map *m;
- int c;
+ struct perf_cpu c;
m = perf_cpu_map__new(s);
if (!m)
return -1;
- for (i = 0; i < m->nr; i++) {
- c = m->map[i];
- if (c >= nr_cpus) {
+ for (i = 0; i < perf_cpu_map__nr(m); i++) {
+ c = perf_cpu_map__cpu(m, i);
+ if (c.cpu >= nr_cpus) {
ret = -1;
break;
}
- set_bit(c, cpumask_bits(b));
+ set_bit(c.cpu, cpumask_bits(b));
}
perf_cpu_map__put(m);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 198982109f0f..70f095624a0b 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
static void synthesize_cpus(struct cpu_map_entries *cpus,
struct perf_cpu_map *map)
{
- int i;
+ int i, map_nr = perf_cpu_map__nr(map);
- cpus->nr = map->nr;
+ cpus->nr = map_nr;
- for (i = 0; i < map->nr; i++)
- cpus->cpu[i] = map->map[i];
+ for (i = 0; i < map_nr; i++)
+ cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu;
}
static void synthesize_mask(struct perf_record_record_cpu_map *mask,
@@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask,
mask->nr = BITS_TO_LONGS(max);
mask->long_size = sizeof(long);
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i], mask->mask);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask);
}
static size_t cpus_size(struct perf_cpu_map *map)
{
- return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+ return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
}
static size_t mask_size(struct perf_cpu_map *map, int *max)
@@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max)
*max = 0;
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
/* bit position of the cpu is + 1 */
- int bit = map->map[i] + 1;
+ int bit = perf_cpu_map__cpu(map, i).cpu + 1;
if (bit > *max)
*max = bit;
@@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
}
int perf_event__synthesize_stat(struct perf_tool *tool,
- u32 cpu, u32 thread, u64 id,
+ struct perf_cpu cpu, u32 thread, u64 id,
struct perf_counts_values *count,
perf_event__handler_t process,
struct machine *machine)
@@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool,
event.header.misc = 0;
event.id = id;
- event.cpu = cpu;
+ event.cpu = cpu.cpu;
event.thread = thread;
event.val = count->val;
event.ena = count->ena;
@@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
}
e->idx = sid->idx;
- e->cpu = sid->cpu;
+ e->cpu = sid->cpu.cpu;
e->tid = sid->tid;
}
}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index c931433bacbf..78a0450db164 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -6,6 +6,7 @@
#include <sys/types.h> // pid_t
#include <linux/compiler.h>
#include <linux/types.h>
+#include <perf/cpumap.h>
struct auxtrace_record;
struct dso;
@@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 27945eeb0cb5..c1ebfc5d2e0c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
if (target->cpu_list)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
- top->evlist->core.cpus->nr > 1 ? "s" : "",
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "",
target->cpu_list);
else {
if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
- top->evlist->core.cpus->nr,
- top->evlist->core.cpus->nr > 1 ? "s" : "");
+ perf_cpu_map__nr(top->evlist->core.cpus),
+ perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "");
}
perf_top__reset_sample_counters(top);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index df3c4671be72..fb4f6616b5fa 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -416,3 +416,18 @@ char *perf_exe(char *buf, int len)
}
return strcpy(buf, "perf");
}
+
+void perf_debuginfod_setup(struct perf_debuginfod *di)
+{
+ /*
+ * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod
+ * processing is not triggered, otherwise we set it to 'di->urls'
+ * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value.
+ */
+ if (!di->set)
+ setenv("DEBUGINFOD_URLS", "", 1);
+ else if (di->urls && strcmp(di->urls, "system"))
+ setenv("DEBUGINFOD_URLS", di->urls, 1);
+
+ pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9f0d36ba77f2..7b625cbd2dd8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,6 +11,9 @@
#include <stddef.h>
#include <linux/compiler.h>
#include <sys/types.h>
+#ifndef __cplusplus
+#include <internal/cpumap.h>
+#endif
/* General helper functions */
void usage(const char *err) __noreturn;
@@ -66,6 +69,12 @@ extern bool test_attr__enabled;
void test_attr__ready(void);
void test_attr__init(void);
struct perf_event_attr;
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags);
+
+struct perf_debuginfod {
+ const char *urls;
+ bool set;
+};
+void perf_debuginfod_setup(struct perf_debuginfod *di);
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
index 0b319fc8bb17..eada0297ef88 100644
--- a/tools/power/acpi/.gitignore
+++ b/tools/power/acpi/.gitignore
@@ -2,4 +2,5 @@
/acpidbg
/acpidump
/ec
+/pfrut
/include/
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index a249c50ebf55..5ff1d9c864d0 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -9,18 +9,18 @@ include ../../scripts/Makefile.include
.NOTPARALLEL:
-all: acpidbg acpidump ec
-clean: acpidbg_clean acpidump_clean ec_clean
-install: acpidbg_install acpidump_install ec_install
-uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall
+all: acpidbg acpidump ec pfrut
+clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean
+install: acpidbg_install acpidump_install ec_install pfrut_install
+uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall
-acpidbg acpidump ec: FORCE
+acpidbg acpidump ec pfrut: FORCE
$(call descend,tools/$@,all)
-acpidbg_clean acpidump_clean ec_clean:
+acpidbg_clean acpidump_clean ec_clean pfrut_clean:
$(call descend,tools/$(@:_clean=),clean)
-acpidbg_install acpidump_install ec_install:
+acpidbg_install acpidump_install ec_install pfrut_install:
$(call descend,tools/$(@:_install=),install)
-acpidbg_uninstall acpidump_uninstall ec_uninstall:
+acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall:
$(call descend,tools/$(@:_uninstall=),uninstall)
.PHONY: FORCE
diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules
index 1d7616f5d0ae..b71aada77688 100644
--- a/tools/power/acpi/Makefile.rules
+++ b/tools/power/acpi/Makefile.rules
@@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/
toolobjs := $(addprefix $(objdir),$(TOOL_OBJS))
$(OUTPUT)$(TOOL): $(toolobjs) FORCE
$(ECHO) " LD " $(subst $(OUTPUT),,$@)
- $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@
+ $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@
$(ECHO) " STRIP " $(subst $(OUTPUT),,$@)
$(QUIET) $(STRIPCMD) $@
diff --git a/tools/power/acpi/man/pfrut.8 b/tools/power/acpi/man/pfrut.8
new file mode 100644
index 000000000000..3db574770e8d
--- /dev/null
+++ b/tools/power/acpi/man/pfrut.8
@@ -0,0 +1,137 @@
+.TH "PFRUT" "8" "October 2021" "pfrut 1.0" ""
+.hy
+.SH Name
+.PP
+pfrut \- Platform Firmware Runtime Update and Telemetry tool
+.SH SYNOPSIS
+.PP
+\f[B]pfrut\f[R] [\f[I]Options\f[R]]
+.SH DESCRIPTION
+.PP
+The PFRUT(Platform Firmware Runtime Update and Telemetry) kernel interface is designed
+to
+.PD 0
+.P
+.PD
+interact with the platform firmware interface defined in the
+.PD 0
+.P
+.PD
+Management Mode Firmware Runtime
+Update (https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf)
+.PD 0
+.P
+.PD
+\f[B]pfrut\f[R] is the tool to interact with the kernel interface.
+.PD 0
+.P
+.PD
+.SH OPTIONS
+.TP
+.B \f[B]\-h\f[R], \f[B]\-\-help\f[R]
+Display helper information.
+.TP
+.B \f[B]\-l\f[R], \f[B]\-\-load\f[R]
+Load the capsule file into the system.
+To be more specific, the capsule file will be copied to the
+communication buffer.
+.TP
+.B \f[B]\-s\f[R], \f[B]\-\-stage\f[R]
+Stage the capsule image from communication buffer into Management Mode
+and perform authentication.
+.TP
+.B \f[B]\-a\f[R], \f[B]\-\-activate\f[R]
+Activate a previous staged capsule image.
+.TP
+.B \f[B]\-u\f[R], \f[B]\-\-update\f[R]
+Perform both stage and activation actions.
+.TP
+.B \f[B]\-q\f[R], \f[B]\-\-query\f[R]
+Query the update capability.
+.TP
+.B \f[B]\-d\f[R], \f[B]\-\-setrev\f[R]
+Set the revision ID of code injection/driver update.
+.TP
+.B \f[B]\-D\f[R], \f[B]\-\-setrevlog\f[R]
+Set the revision ID of telemetry.
+.TP
+.B \f[B]\-G\f[R], \f[B]\-\-getloginfo\f[R]
+Get telemetry log information and print it out.
+.TP
+.B \f[B]\-T\f[R], \f[B]\-\-type\f[R]
+Set the telemetry log data type.
+.TP
+.B \f[B]\-L\f[R], \f[B]\-\-level\f[R]
+Set the telemetry log level.
+.TP
+.B \f[B]\-R\f[R], \f[B]\-\-read\f[R]
+Read all the telemetry data and print it out.
+.SH EXAMPLES
+.PP
+\f[B]pfrut \-G\f[R]
+.PP
+log_level:4
+.PD 0
+.P
+.PD
+log_type:0
+.PD 0
+.P
+.PD
+log_revid:2
+.PD 0
+.P
+.PD
+max_data_size:65536
+.PD 0
+.P
+.PD
+chunk1_size:0
+.PD 0
+.P
+.PD
+chunk2_size:1401
+.PD 0
+.P
+.PD
+rollover_cnt:0
+.PD 0
+.P
+.PD
+reset_cnt:4
+.PP
+\f[B]pfru \-q\f[R]
+.PP
+code injection image type:794bf8b2\-6e7b\-454e\-885f\-3fb9bb185402
+.PD 0
+.P
+.PD
+fw_version:0
+.PD 0
+.P
+.PD
+code_rt_version:1
+.PD 0
+.P
+.PD
+driver update image type:0e5f0b14\-f849\-7945\-ad81\-bc7b6d2bb245
+.PD 0
+.P
+.PD
+drv_rt_version:0
+.PD 0
+.P
+.PD
+drv_svn:0
+.PD 0
+.P
+.PD
+platform id:39214663\-b1a8\-4eaa\-9024\-f2bb53ea4723
+.PD 0
+.P
+.PD
+oem id:a36db54f\-ea2a\-e14e\-b7c4\-b5780e51ba3d
+.PP
+\f[B]pfrut \-l yours.cap \-u \-T 1 \-L 4\f[R]
+.SH AUTHORS
+Chen Yu.
diff --git a/tools/power/acpi/tools/pfrut/Makefile b/tools/power/acpi/tools/pfrut/Makefile
new file mode 100644
index 000000000000..61c1a96fd433
--- /dev/null
+++ b/tools/power/acpi/tools/pfrut/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+include ../../Makefile.config
+
+TOOL = pfrut
+EXTRA_INSTALL = install-man
+EXTRA_UNINSTALL = uninstall-man
+
+CFLAGS += -Wall -O2
+CFLAGS += -DPFRUT_HEADER='"../../../../../include/uapi/linux/pfrut.h"'
+LDFLAGS += -luuid
+
+TOOL_OBJS = \
+ pfrut.o
+
+include ../../Makefile.rules
+
+install-man: $(srctree)/man/pfrut.8
+ $(ECHO) " INST " pfrut.8
+ $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/pfrut.8
+uninstall-man:
+ $(ECHO) " UNINST " pfrut.8
+ $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/pfrut.8
diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
new file mode 100644
index 000000000000..d79c335594b2
--- /dev/null
+++ b/tools/power/acpi/tools/pfrut/pfrut.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform Firmware Runtime Update tool to do Management
+ * Mode code injection/driver update and telemetry retrieval.
+ *
+ * This tool uses the interfaces provided by pfr_update and
+ * pfr_telemetry drivers. These interfaces are exposed via
+ * /dev/pfr_update and /dev/pfr_telemetry. Write operation
+ * on the /dev/pfr_update is to load the EFI capsule into
+ * kernel space. Mmap/read operations on /dev/pfr_telemetry
+ * could be used to read the telemetry data to user space.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <uuid/uuid.h>
+#include PFRUT_HEADER
+
+char *capsule_name;
+int action, query_cap, log_type, log_level, log_read, log_getinfo,
+ revid, log_revid;
+int set_log_level, set_log_type,
+ set_revid, set_log_revid;
+
+char *progname;
+
+#define LOG_ERR 0
+#define LOG_WARN 1
+#define LOG_INFO 2
+#define LOG_VERB 4
+#define LOG_EXEC_IDX 0
+#define LOG_HISTORY_IDX 1
+#define REVID_1 1
+#define REVID_2 2
+
+static int valid_log_level(int level)
+{
+ return level == LOG_ERR || level == LOG_WARN ||
+ level == LOG_INFO || level == LOG_VERB;
+}
+
+static int valid_log_type(int type)
+{
+ return type == LOG_EXEC_IDX || type == LOG_HISTORY_IDX;
+}
+
+static inline int valid_log_revid(int id)
+{
+ return id == REVID_1 || id == REVID_2;
+}
+
+static void help(void)
+{
+ fprintf(stderr,
+ "usage: %s [OPTIONS]\n"
+ " code injection:\n"
+ " -l, --load\n"
+ " -s, --stage\n"
+ " -a, --activate\n"
+ " -u, --update [stage and activate]\n"
+ " -q, --query\n"
+ " -d, --revid update\n"
+ " telemetry:\n"
+ " -G, --getloginfo\n"
+ " -T, --type(0:execution, 1:history)\n"
+ " -L, --level(0, 1, 2, 4)\n"
+ " -R, --read\n"
+ " -D, --revid log\n",
+ progname);
+}
+
+char *option_string = "l:sauqd:GT:L:RD:h";
+static struct option long_options[] = {
+ {"load", required_argument, 0, 'l'},
+ {"stage", no_argument, 0, 's'},
+ {"activate", no_argument, 0, 'a'},
+ {"update", no_argument, 0, 'u'},
+ {"query", no_argument, 0, 'q'},
+ {"getloginfo", no_argument, 0, 'G'},
+ {"type", required_argument, 0, 'T'},
+ {"level", required_argument, 0, 'L'},
+ {"read", no_argument, 0, 'R'},
+ {"setrev", required_argument, 0, 'd'},
+ {"setrevlog", required_argument, 0, 'D'},
+ {"help", no_argument, 0, 'h'},
+ {}
+};
+
+static void parse_options(int argc, char **argv)
+{
+ int option_index = 0;
+ char *pathname;
+ int opt;
+
+ pathname = strdup(argv[0]);
+ progname = basename(pathname);
+
+ while ((opt = getopt_long_only(argc, argv, option_string,
+ long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'l':
+ capsule_name = optarg;
+ break;
+ case 's':
+ action = 1;
+ break;
+ case 'a':
+ action = 2;
+ break;
+ case 'u':
+ action = 3;
+ break;
+ case 'q':
+ query_cap = 1;
+ break;
+ case 'G':
+ log_getinfo = 1;
+ break;
+ case 'T':
+ log_type = atoi(optarg);
+ set_log_type = 1;
+ break;
+ case 'L':
+ log_level = atoi(optarg);
+ set_log_level = 1;
+ break;
+ case 'R':
+ log_read = 1;
+ break;
+ case 'd':
+ revid = atoi(optarg);
+ set_revid = 1;
+ break;
+ case 'D':
+ log_revid = atoi(optarg);
+ set_log_revid = 1;
+ break;
+ case 'h':
+ help();
+ exit(0);
+ default:
+ break;
+ }
+ }
+}
+
+void print_cap(struct pfru_update_cap_info *cap)
+{
+ char *uuid;
+
+ uuid = malloc(37);
+ if (!uuid) {
+ perror("Can not allocate uuid buffer\n");
+ exit(1);
+ }
+
+ uuid_unparse(cap->code_type, uuid);
+ printf("code injection image type:%s\n", uuid);
+ printf("fw_version:%d\n", cap->fw_version);
+ printf("code_rt_version:%d\n", cap->code_rt_version);
+
+ uuid_unparse(cap->drv_type, uuid);
+ printf("driver update image type:%s\n", uuid);
+ printf("drv_rt_version:%d\n", cap->drv_rt_version);
+ printf("drv_svn:%d\n", cap->drv_svn);
+
+ uuid_unparse(cap->platform_id, uuid);
+ printf("platform id:%s\n", uuid);
+ uuid_unparse(cap->oem_id, uuid);
+ printf("oem id:%s\n", uuid);
+ printf("oem information length:%d\n", cap->oem_info_len);
+
+ free(uuid);
+}
+
+int main(int argc, char *argv[])
+{
+ int fd_update, fd_update_log, fd_capsule;
+ struct pfrt_log_data_info data_info;
+ struct pfrt_log_info info;
+ struct pfru_update_cap_info cap;
+ void *addr_map_capsule;
+ struct stat st;
+ char *log_buf;
+ int ret = 0;
+
+ if (getuid() != 0) {
+ printf("Please run the tool as root - Exiting.\n");
+ return 1;
+ }
+
+ parse_options(argc, argv);
+
+ fd_update = open("/dev/acpi_pfr_update0", O_RDWR);
+ if (fd_update < 0) {
+ printf("PFRU device not supported - Quit...\n");
+ return 1;
+ }
+
+ fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR);
+ if (fd_update_log < 0) {
+ printf("PFRT device not supported - Quit...\n");
+ return 1;
+ }
+
+ if (query_cap) {
+ ret = ioctl(fd_update, PFRU_IOC_QUERY_CAP, &cap);
+ if (ret)
+ perror("Query Update Capability info failed.");
+ else
+ print_cap(&cap);
+
+ close(fd_update);
+ close(fd_update_log);
+
+ return ret;
+ }
+
+ if (log_getinfo) {
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info);
+ if (ret) {
+ perror("Get telemetry data info failed.");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_INFO, &info);
+ if (ret) {
+ perror("Get telemetry info failed.");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ printf("log_level:%d\n", info.log_level);
+ printf("log_type:%d\n", info.log_type);
+ printf("log_revid:%d\n", info.log_revid);
+ printf("max_data_size:%d\n", data_info.max_data_size);
+ printf("chunk1_size:%d\n", data_info.chunk1_size);
+ printf("chunk2_size:%d\n", data_info.chunk2_size);
+ printf("rollover_cnt:%d\n", data_info.rollover_cnt);
+ printf("reset_cnt:%d\n", data_info.reset_cnt);
+
+ return 0;
+ }
+
+ info.log_level = -1;
+ info.log_type = -1;
+ info.log_revid = -1;
+
+ if (set_log_level) {
+ if (!valid_log_level(log_level)) {
+ printf("Invalid log level %d\n",
+ log_level);
+ } else {
+ info.log_level = log_level;
+ }
+ }
+
+ if (set_log_type) {
+ if (!valid_log_type(log_type)) {
+ printf("Invalid log type %d\n",
+ log_type);
+ } else {
+ info.log_type = log_type;
+ }
+ }
+
+ if (set_log_revid) {
+ if (!valid_log_revid(log_revid)) {
+ printf("Invalid log revid %d, unchanged.\n",
+ log_revid);
+ } else {
+ info.log_revid = log_revid;
+ }
+ }
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_SET_INFO, &info);
+ if (ret) {
+ perror("Log information set failed.(log_level, log_type, log_revid)");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ if (set_revid) {
+ ret = ioctl(fd_update, PFRU_IOC_SET_REV, &revid);
+ if (ret) {
+ perror("pfru update revid set failed");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ printf("pfru update revid set to %d\n", revid);
+ }
+
+ if (capsule_name) {
+ fd_capsule = open(capsule_name, O_RDONLY);
+ if (fd_capsule < 0) {
+ perror("Can not open capsule file...");
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ if (fstat(fd_capsule, &st) < 0) {
+ perror("Can not fstat capsule file...");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ addr_map_capsule = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
+ fd_capsule, 0);
+ if (addr_map_capsule == MAP_FAILED) {
+ perror("Failed to mmap capsule file.");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ ret = write(fd_update, (char *)addr_map_capsule, st.st_size);
+ printf("Load %d bytes of capsule file into the system\n",
+ ret);
+
+ if (ret == -1) {
+ perror("Failed to load capsule file");
+ close(fd_capsule);
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ munmap(addr_map_capsule, st.st_size);
+ close(fd_capsule);
+ printf("Load done.\n");
+ }
+
+ if (action) {
+ if (action == 1) {
+ ret = ioctl(fd_update, PFRU_IOC_STAGE, NULL);
+ } else if (action == 2) {
+ ret = ioctl(fd_update, PFRU_IOC_ACTIVATE, NULL);
+ } else if (action == 3) {
+ ret = ioctl(fd_update, PFRU_IOC_STAGE_ACTIVATE, NULL);
+ } else {
+ close(fd_update);
+ close(fd_update_log);
+
+ return 1;
+ }
+ printf("Update finished, return %d\n", ret);
+ }
+
+ close(fd_update);
+
+ if (log_read) {
+ void *p_mmap;
+ int max_data_sz;
+
+ ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info);
+ if (ret) {
+ perror("Get telemetry data info failed.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ max_data_sz = data_info.max_data_size;
+ if (!max_data_sz) {
+ printf("No telemetry data available.\n");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ log_buf = malloc(max_data_sz + 1);
+ if (!log_buf) {
+ perror("log_buf allocate failed.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ p_mmap = mmap(NULL, max_data_sz, PROT_READ, MAP_SHARED, fd_update_log, 0);
+ if (p_mmap == MAP_FAILED) {
+ perror("mmap error.");
+ close(fd_update_log);
+
+ return 1;
+ }
+
+ memcpy(log_buf, p_mmap, max_data_sz);
+ log_buf[max_data_sz] = '\0';
+ printf("%s\n", log_buf);
+ free(log_buf);
+
+ munmap(p_mmap, max_data_sz);
+ }
+
+ close(fd_update_log);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 32fc5b3b5cf6..911345c526e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -10,6 +10,7 @@
#include "test_d_path.skel.h"
#include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
static int duration;
@@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem(void)
test_d_path_check_rdonly_mem__destroy(skel);
}
+static void test_d_path_check_types(void)
+{
+ struct test_d_path_check_types *skel;
+
+ skel = test_d_path_check_types__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+ test_d_path_check_types__destroy(skel);
+}
+
void test_d_path(void)
{
if (test__start_subtest("basic"))
@@ -174,4 +185,7 @@ void test_d_path(void)
if (test__start_subtest("check_rdonly_mem"))
test_d_path_check_rdonly_mem();
+
+ if (test__start_subtest("check_alloc_mem"))
+ test_d_path_check_types();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 983ab0b47d30..b2b357f8c74c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,46 +8,47 @@
void serial_test_xdp_link(void)
{
- __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
struct bpf_link_info link_info;
struct bpf_prog_info prog_info;
struct bpf_link *link;
+ int err;
__u32 link_info_len = sizeof(link_info);
__u32 prog_info_len = sizeof(prog_info);
skel1 = test_xdp_link__open_and_load();
- if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "skel_load"))
goto cleanup;
prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
skel2 = test_xdp_link__open_and_load();
- if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "skel_load"))
goto cleanup;
prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info1"))
goto cleanup;
id1 = prog_info.id;
memset(&prog_info, 0, sizeof(prog_info));
err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ if (!ASSERT_OK(err, "fd_info2"))
goto cleanup;
id2 = prog_info.id;
/* set initial prog attachment */
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "fd_attach"))
goto cleanup;
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
@@ -62,7 +63,7 @@ void serial_test_xdp_link(void)
/* detach BPF program */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "prog_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "prog_detach"))
goto cleanup;
/* now BPF link should attach successfully */
@@ -73,24 +74,23 @@ void serial_test_xdp_link(void)
/* validate prog ID */
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
/* BPF prog attach is not allowed to replace BPF link */
opts.old_fd = prog_fd1;
err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_attach_fail"))
goto cleanup;
/* Can't force-update when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
- if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_update_fail"))
goto cleanup;
/* Can't force-detach when BPF link is active */
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
- if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ if (!ASSERT_ERR(err, "prog_detach_fail"))
goto cleanup;
/* BPF link is not allowed to replace another BPF link */
@@ -110,40 +110,39 @@ void serial_test_xdp_link(void)
skel2->links.xdp_handler = link;
err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id2, "id2_check",
- "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
goto cleanup;
/* updating program under active BPF link works as expected */
err = bpf_link__update_program(link, skel1->progs.xdp_handler);
- if (CHECK(err, "link_upd", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_upd"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_info"))
goto cleanup;
- CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
- "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
- CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+ /* updating program under active BPF link with different type fails */
+ err = bpf_link__update_program(link, skel1->progs.tc_handler);
+ if (!ASSERT_ERR(err, "link_upd_invalid"))
+ goto cleanup;
err = bpf_link__detach(link);
- if (CHECK(err, "link_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "link_detach"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
- goto cleanup;
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
+
+ ASSERT_OK(err, "link_info");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
/* ifindex should be zeroed out */
- CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
cleanup:
test_xdp_link__destroy(skel1);
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644
index 000000000000..7e02b7361307
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to 'regular' memory. It
+ * cannot be submitted to ring buffer.
+ */
+ bpf_ringbuf_submit(active, 0);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index a8233e7f173b..728dbd39eff0 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
@@ -41,11 +41,11 @@ struct {
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index ce6974016f53..43bd7a20cc50 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index ee7d6ac0f615..64ff32eaae92 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp)
{
return 0;
}
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
new file mode 100644
index 000000000000..b64d33e4833c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -0,0 +1,95 @@
+{
+ "ringbuf: invalid reservation offset 1",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "dereference of modified alloc_mem ptr R1",
+},
+{
+ "ringbuf: invalid reservation offset 2",
+ .insns = {
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* store a pointer to the reserved memory in R6 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ /* spill R6(mem) into the stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ /* fill it back in R7 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+ /* add invalid offset to reserved ringbuf memory */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe),
+ /* should be able to access *(R7) = 0 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+ /* submit the reserved ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .result = REJECT,
+ .errstr = "R7 min value is outside of the allowed memory range",
+},
+{
+ "ringbuf: check passing rb mem to helpers",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* reserve 8 byte ringbuf memory */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* check whether the reservation was successful */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ /* pass allocated ring buffer memory to fib lookup */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup),
+ /* submit the ringbuf memory */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 1a8eb9672bd1..8cfc5349d2a8 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -84,7 +84,7 @@
},
.fixup_map_ringbuf = { 1 },
.result = REJECT,
- .errstr = "R0 pointer arithmetic on mem_or_null prohibited",
+ .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
},
{
"check corrupted spill/fill",
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3cb5ac5da087..dce7de7755e6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -4,14 +4,16 @@
/aarch64/get-reg-list
/aarch64/psci_cpu_on_test
/aarch64/vgic_init
+/aarch64/vgic_irq
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
+/x86_64/amx_test
+/x86_64/cpuid_test
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/emulator_error_test
-/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_clock_test
/x86_64/kvm_pv_test
@@ -21,6 +23,7 @@
/x86_64/mmio_warning_test
/x86_64/mmu_role_test
/x86_64/platform_info_test
+/x86_64/pmu_event_filter_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/sev_migrate_tests
@@ -35,6 +38,7 @@
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
+/x86_64/vmx_exception_with_invalid_guest_state
/x86_64/vmx_invalid_nested_guest_state
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 17342b575e85..81ebf99d6ff0 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -32,17 +32,22 @@ endif
ifeq ($(ARCH),s390)
UNAME_M := s390x
endif
+# Set UNAME_M riscv compile/install to work
+ifeq ($(ARCH),riscv)
+ UNAME_M := riscv
+endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
+LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
-TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
-TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
@@ -51,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
@@ -64,6 +70,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
@@ -77,6 +84,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/amx_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -96,6 +104,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -119,6 +128,13 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -133,7 +149,7 @@ endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -I$(<D) -Iinclude/$(UNAME_M) -I..
+ -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS)
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index bf6a45b0b8dc..9ad38bd360a4 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -382,7 +382,7 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
- vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA);
+ vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index cc898181faab..f769fc6cd927 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -1014,6 +1014,22 @@ static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
};
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -1025,6 +1041,21 @@ static __u64 sve_rejects_set[] = {
{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
.regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
.rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
static struct vcpu_config vregs_config = {
.sublists = {
@@ -1056,11 +1087,30 @@ static struct vcpu_config sve_pmu_config = {
{0},
},
};
+static struct vcpu_config pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
static struct vcpu_config *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
&sve_config,
&sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
};
static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
new file mode 100644
index 000000000000..e6c7d7f8fbd1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+#define GICD_BASE_GPA 0x08000000ULL
+#define GICR_BASE_GPA 0x080A0000ULL
+#define VCPU_ID 0
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+static void *dist = (void *)GICD_BASE_GPA;
+static void *redist = (void *)GICR_BASE_GPA;
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ asm volatile("wfi\n"
+ "msr daifclr, #2\n"
+ /* handle IRQ */
+ "msr daifset, #2\n"
+ : : : "memory");
+ }
+ asm volatile("msr daifclr, #2" : : : "memory");
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /* In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /*
+ * Test up to 4 levels of preemption. The reason is that KVM doesn't
+ * currently implement the ability to have more than the number-of-LRs
+ * number of concurrently active IRQs. The number of LRs implemented is
+ * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+ */
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args args)
+{
+ uint32_t i, nr_irqs = args.nr_irqs;
+ bool level_sensitive = args.level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1, dist, redist);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !args.level_sensitive);
+
+ gic_set_eoi_split(args.eoi_split);
+
+ reset_priorities(&args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(&args, inject_fns, f) {
+ test_injection(&args, f);
+ test_preemption(&args, f);
+ test_injection_failure(&args, f);
+ }
+
+ /* Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(&args, set_active_fns, f)
+ test_restore_active(&args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
+ if (intid >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ uint32_t vcpu, bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ fd[f] = eventfd(0, 0);
+ TEST_ASSERT(fd[f] != -1,
+ "eventfd failed, errno: %i\n", errno);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ struct kvm_irqfd irqfd = {
+ .fd = fd[f],
+ .gsi = i - MIN_SPI,
+ };
+ assert(i <= (uint64_t)UINT_MAX);
+ vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ "Write to KVM_IRQFD failed with ret: %d\n", ret);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vm *vm, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i,
+ VCPU_ID, expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, VCPU_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ /* Setup the guest args page (so it gets the args). */
+ vcpu_args_set(vm, 0, 1, args);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vm, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi(optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /* If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
index 85dd1e53048e..b217ea17cac5 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -11,11 +11,37 @@ enum gic_type {
GIC_TYPE_MAX,
};
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
void gic_init(enum gic_type type, unsigned int nr_cpus,
void *dist_base, void *redist_base);
void gic_irq_enable(unsigned int intid);
void gic_irq_disable(unsigned int intid);
unsigned int gic_get_and_ack_irq(void);
void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
index b51536d469a6..ba0886e8a2bb 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -16,8 +16,12 @@
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
#define GICD_ICACTIVER 0x0380
+#define GICD_ISACTIVER 0x0300
#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
/*
* The assumption is that the guest runs in a non-secure mode.
@@ -49,16 +53,24 @@
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_ICENABLER GICD_ICENABLER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
/* CPU interface registers */
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+
#define ICC_PMR_DEF_PRIO 0xf0
#define ICC_SRE_EL1_SRE (1U << 0)
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 27d8e1bb5b36..8f9f46979a00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -113,6 +113,9 @@ enum {
#define ESR_EC_WP_CURRENT 0x35
#define ESR_EC_BRK_INS 0x3c
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k);
+
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 0ecfb253893c..4442081221a0 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -14,7 +14,21 @@
((uint64_t)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
-#endif /* SELFTEST_KVM_VGIC_H */
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d62edc49d67..c9286811a4cb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,412 +7,7 @@
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "test_util.h"
-
-#include "asm/kvm.h"
-#include "linux/list.h"
-#include "linux/kvm.h"
-#include <sys/ioctl.h>
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-#define KVM_MAX_VCPUS 512
-
-#define NSEC_PER_SEC 1000000000L
-
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-#define DEFAULT_GUEST_PHY_PAGES 512
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- VM_MODE_P47V64_4K,
- VM_MODE_P44V64_4K,
- NUM_VM_MODES,
-};
-
-#if defined(__aarch64__)
-
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__x86_64__)
-
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__s390x__)
-
-#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 16)
-
-#endif
-
-#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
-#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
-extern const struct vm_guest_mode_params vm_guest_mode_params[];
-
-int open_path_or_exit(const char *path, int flags);
-int open_kvm_dev_path_or_exit(void);
-int kvm_check_cap(long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap);
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
-
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
-void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
-
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
-void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-#endif
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
-#endif
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
-
-int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
-int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
-int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-
-int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-void virt_pgd_alloc(struct kvm_vm *vm);
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
-
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
-
-/* Same as vm_create_default, but can be used for more than one vcpu */
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
-uint64_t vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
-
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end);
-
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
-
-/* Common ucalls */
-enum {
- UCALL_NONE,
- UCALL_SYNC,
- UCALL_ABORT,
- UCALL_DONE,
- UCALL_UNHANDLED,
-};
-
-#define UCALL_MAX_ARGS 6
-
-struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
-};
-
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
-void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
-
-#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
- ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
-#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
-#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- _condstr, __LINE__, _args); \
-} while (0)
-
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
-
-int vm_get_stats_fd(struct kvm_vm *vm);
-int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
-
-uint32_t guest_get_vcpuid(void);
+#include "kvm_util_base.h"
+#include "ucall_common.h"
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
new file mode 100644
index 000000000000..66775de26952
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util_base.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UTIL_BASE_H
+#define SELFTEST_KVM_UTIL_BASE_H
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/list.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+int kvm_check_cap(long cap);
+int vm_check_cap(struct kvm_vm *vm, long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
+
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+ uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_guest_debug *debug);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+#ifdef __KVM_HAVE_VCPU_EVENTS
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+#endif
+#ifdef __x86_64__
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state);
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state, bool ignore_error);
+#endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
+
+int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
+int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
+int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The number of extra pages to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using memslot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code);
+
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+void vm_xsave_req_perm(void);
+
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
+
+uint32_t guest_get_vcpuid(void);
+
+#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
new file mode 100644
index 000000000000..dc284c6bdbc3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V processor specific defines
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include <linux/stringify.h>
+
+static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
+ uint64_t size)
+{
+ return KVM_REG_RISCV | type | idx | size;
+}
+
+#if __riscv_xlen == 64
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64
+#else
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32
+#endif
+
+#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \
+ KVM_REG_RISCV_CONFIG_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \
+ KVM_REG_RISCV_CORE_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \
+ KVM_REG_RISCV_TIMER_REG(name), \
+ KVM_REG_SIZE_U64)
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long *addr)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)addr;
+ vcpu_get_reg(vm, vcpuid, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long val)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)&val;
+ vcpu_set_reg(vm, vcpuid, &reg);
+}
+
+/* L3 index Bit[47:39] */
+#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
+#define PGTBL_L3_INDEX_SHIFT 39
+#define PGTBL_L3_BLOCK_SHIFT 39
+#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL
+#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1))
+/* L2 index Bit[38:30] */
+#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL
+#define PGTBL_L2_INDEX_SHIFT 30
+#define PGTBL_L2_BLOCK_SHIFT 30
+#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL
+#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1))
+/* L1 index Bit[29:21] */
+#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL
+#define PGTBL_L1_INDEX_SHIFT 21
+#define PGTBL_L1_BLOCK_SHIFT 21
+#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL
+#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1))
+/* L0 index Bit[20:12] */
+#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL
+#define PGTBL_L0_INDEX_SHIFT 12
+#define PGTBL_L0_BLOCK_SHIFT 12
+#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL
+#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1))
+
+#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL
+#define PGTBL_PTE_ADDR_SHIFT 10
+#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL
+#define PGTBL_PTE_RSW_SHIFT 8
+#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL
+#define PGTBL_PTE_DIRTY_SHIFT 7
+#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL
+#define PGTBL_PTE_ACCESSED_SHIFT 6
+#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL
+#define PGTBL_PTE_GLOBAL_SHIFT 5
+#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL
+#define PGTBL_PTE_USER_SHIFT 4
+#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL
+#define PGTBL_PTE_EXECUTE_SHIFT 3
+#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL
+#define PGTBL_PTE_WRITE_SHIFT 2
+#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
+#define PGTBL_PTE_READ_SHIFT 1
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \
+ PGTBL_PTE_WRITE_MASK | \
+ PGTBL_PTE_READ_MASK)
+#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+#define PGTBL_PTE_VALID_SHIFT 0
+
+#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
+#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
+
+#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39 _AC(0x8000000000000000, UL)
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT 44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
new file mode 100644
index 000000000000..9eecc9d40b79
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UCALL_COMMON_H
+#define SELFTEST_KVM_UCALL_COMMON_H
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_DONE,
+ UCALL_UNHANDLED,
+};
+
+#define UCALL_MAX_ARGS 6
+
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+};
+
+void ucall_init(struct kvm_vm *vm, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
+} while (0)
+
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 05e65ca1c30c..423d8a61bd2e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -10,8 +10,10 @@
#include <assert.h>
#include <stdint.h>
+#include <syscall.h>
#include <asm/msr-index.h>
+#include <asm/prctl.h>
#include "../kvm_util.h"
@@ -92,6 +94,21 @@ struct desc_ptr {
uint64_t address;
} __attribute__((packed));
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
return ((uint64_t)desc->base3 << 32) |
@@ -347,17 +364,37 @@ static inline unsigned long get_xmm(int n)
}
bool is_intel_cpu(void);
+bool is_amd_cpu(void);
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
-struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
struct kvm_msr_list *kvm_get_msr_index_list(void);
uint64_t kvm_get_feature_msr(uint64_t msr_index);
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
@@ -402,6 +439,11 @@ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte);
/*
+ * get_cpuid() - find matching CPUID entry and return pointer to it.
+ */
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index);
+/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
* if a match was found and successfully overwritten.
@@ -443,4 +485,11 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define XSTATE_XTILE_CFG_BIT 17
+#define XSTATE_XTILE_DATA_BIT 18
+
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \
+ XSTATE_XTILE_DATA_MASK)
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
index fff4fc27504d..55668631d546 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -93,3 +93,69 @@ void gic_set_eoi(unsigned int intid)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_write_eoir(intid);
}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
index d81d739433dc..75d07313c893 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -14,6 +14,17 @@ struct gic_common_ops {
void (*gic_irq_disable)(unsigned int intid);
uint64_t (*gic_read_iar)(void);
void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 2dbf3339b62e..00f613c0583c 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -19,7 +19,8 @@ struct gicv3_data {
unsigned int nr_spis;
};
-#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
enum gicv3_intid_range {
SGI_RANGE,
@@ -50,6 +51,14 @@ static void gicv3_gicr_wait_for_rwp(void *redist_base)
}
}
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+}
+
static enum gicv3_intid_range get_intid_range(unsigned int intid)
{
switch (intid) {
@@ -81,39 +90,175 @@ static void gicv3_write_eoir(uint32_t irq)
isb();
}
-static void
-gicv3_config_irq(unsigned int intid, unsigned int offset)
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /* All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
{
uint32_t cpu = guest_get_vcpuid();
- uint32_t mask = 1 << (intid % 32);
enum gicv3_intid_range intid_range = get_intid_range(intid);
- void *reg;
-
- /* We care about 'cpu' only for SGIs or PPIs */
- if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) {
- GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
- reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) +
- offset;
- writel(mask, reg);
- gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]);
- } else if (intid_range == SPI_RANGE) {
- reg = gicv3_data.dist_base + offset + (intid / 32) * 4;
- writel(mask, reg);
- gicv3_gicd_wait_for_rwp();
- } else {
- GUEST_ASSERT(0);
- }
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(*val < (1U << bits_per_field));
+ /* Some registers like IROUTER are 64 bit long. Those are currently not
+ * supported by readl nor writel, so just asserting here until then.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
}
-static void gicv3_irq_enable(unsigned int intid)
+static void gicv3_irq_clear_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ISENABLER);
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
}
-static void gicv3_irq_disable(unsigned int intid)
+static bool gicv3_irq_get_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ICENABLER);
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
static void gicv3_enable_redist(void *redist_base)
@@ -237,4 +382,15 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_disable = gicv3_irq_disable,
.gic_read_iar = gicv3_read_iar,
.gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index b4eeeafd2a70..9343d82519b4 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
@@ -237,6 +238,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ /* Configure base granule size */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
@@ -245,25 +247,47 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
break;
case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
+ case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
@@ -432,3 +456,47 @@ uint32_t guest_get_vcpuid(void)
{
return read_sysreg(tpidr_el1);
}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa);
+ TEST_ASSERT(vm_fd >= 0, "Can't create VM");
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu");
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get target");
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get init vcpu");
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, "Can't get MMFR0");
+
+ *ps4k = ((val >> 28) & 0xf) != 0xf;
+ *ps64k = ((val >> 24) & 0xf) == 0;
+ *ps16k = ((val >> 20) & 0xf) != 0;
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+void __attribute__((constructor)) init_guest_modes(void)
+{
+ guest_modes_append_default();
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b9b271ff520d..b3a0fca0d780 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -5,11 +5,14 @@
#include <linux/kvm.h>
#include <linux/sizes.h>
+#include <asm/kvm_para.h>
#include <asm/kvm.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
/*
* vGIC-v3 default host setup
@@ -28,7 +31,7 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
{
int gic_fd;
@@ -50,6 +53,13 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
/* Distributor setup */
gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
+ 0, &nr_irqs, true);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
@@ -68,3 +78,94 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
return gic_fd;
}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, false);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, true);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid,
+ uint32_t vcpu, uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu == 0);
+ attr += SZ_64K;
+ }
+
+ /* All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_access(gic_fd, group, attr, &val, false);
+ val |= 1ULL << index;
+ kvm_device_access(gic_fd, group, attr, &val, true);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index c330f414ef96..8784013b747c 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,22 +4,59 @@
*/
#include "guest_modes.h"
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
+#ifndef __aarch64__
guest_mode_append(VM_MODE_DEFAULT, true, true);
-
-#ifdef __aarch64__
- guest_mode_append(VM_MODE_P40V48_64K, true, true);
+#else
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ bool ps4k, ps16k, ps64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k);
+
+ vm_mode_default = NUM_VM_MODES;
+
if (limit >= 52)
- guest_mode_append(VM_MODE_P52V48_64K, true, true);
+ guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k);
if (limit >= 48) {
- guest_mode_append(VM_MODE_P48V48_4K, true, true);
- guest_mode_append(VM_MODE_P48V48_64K, true, true);
+ guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k);
+ }
+ if (limit >= 40) {
+ guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k);
+ if (ps4k)
+ vm_mode_default = VM_MODE_P40V48_4K;
}
+ if (limit >= 36) {
+ guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k);
+ guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k);
+ }
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
}
#endif
#ifdef __s390x__
@@ -38,6 +75,16 @@ void guest_modes_append_default(void)
guest_mode_append(VM_MODE_P47V64_4K, true, true);
}
#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true, true);
+ }
+#endif
}
void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 53d2b5d04b82..8c53f96ab7fe 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -85,6 +85,33 @@ int kvm_check_cap(long cap)
return ret;
}
+/* VM Check Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
/* VM Enable Capability
*
* Input Args:
@@ -166,12 +193,18 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
[VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
[VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
[VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -185,12 +218,18 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
[VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
[VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
[VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
[VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -252,9 +291,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
case VM_MODE_PXXV48_4K:
@@ -344,6 +393,13 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
struct kvm_vm *vm;
int i;
+#ifdef __x86_64__
+ /*
+ * Permission needs to be requested before KVM_SET_CPUID2.
+ */
+ vm_xsave_req_perm();
+#endif
+
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
@@ -443,9 +499,11 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log, .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
int ret;
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
@@ -2087,6 +2145,78 @@ int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
}
/*
+ * IRQ related functions.
+ */
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
+
+ return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+}
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ int ret = _kvm_irq_line(vm, irq, level);
+
+ TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
+}
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
+{
+ struct kvm_irq_routing *routing;
+ size_t size;
+
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
+
+ return routing;
+}
+
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
+{
+ int i;
+
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
+}
+
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ assert(routing);
+ ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
+ free(routing);
+
+ return ret;
+}
+
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/*
* VM Dump
*
* Input Args:
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..d377f2603d98
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+ page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid)
+{
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void guest_hang(void)
+{
+ while (1)
+ ;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ int r;
+ size_t stack_size = vm->page_size == 4096 ?
+ DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+
+ vm_vcpu_add(vm, vcpuid);
+ riscv_vcpu_mmu_setup(vm, vcpuid);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp),
+ stack_vaddr + stack_size);
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc),
+ (unsigned long)guest_code);
+
+ /* Setup default exception vector of guest */
+ set_reg(vm, vcpuid, RISCV_CSR_REG(stvec),
+ (unsigned long)guest_hang);
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u\n", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ };
+ set_reg(vm, vcpuid, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..9e42d8248fa6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc,
+ 0, 0, 0, 0, 0);
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT &&
+ run->riscv_sbi.function_id == 0) {
+ memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]),
+ sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index eef7b34756d5..5f9d7e91dc69 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -650,6 +650,45 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
vcpu_sregs_set(vm, vcpuid, &sregs);
}
+#define CPUID_XFD_BIT (1 << 4)
+static bool is_xfd_supported(void)
+{
+ int eax, ebx, ecx, edx;
+ const int leaf = 0xd, subleaf = 0x1;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : /* output */ "=a"(eax), "=b"(ebx),
+ "=c"(ecx), "=d"(edx)
+ : /* input */ "0"(leaf), "2"(subleaf));
+
+ return !!(eax & CPUID_XFD_BIT);
+}
+
+void vm_xsave_req_perm(void)
+{
+ unsigned long bitmask;
+ long rc;
+
+ if (!is_xfd_supported())
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+ XSTATE_XTILE_DATA_BIT);
+ /*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+ if (rc)
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
+ "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
+ bitmask);
+}
+
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
@@ -847,6 +886,17 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
return entry;
}
+
+int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+}
+
/*
* VM VCPU CPUID Set
*
@@ -864,12 +914,9 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
void vcpu_set_cpuid(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ rc = __vcpu_set_cpuid(vm, vcpuid, cpuid);
TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
rc, errno);
@@ -1017,21 +1064,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
sregs_dump(stream, &sregs, indent + 4);
}
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
static int kvm_get_num_msrs_fd(int kvm_fd)
{
struct kvm_msr_list nmsrs;
@@ -1069,6 +1101,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
return list;
}
+static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size;
+
+ size = vm_check_cap(vm, KVM_CAP_XSAVE2);
+ if (!size)
+ size = sizeof(struct kvm_xsave);
+
+ state->xsave = malloc(size);
+ if (size == sizeof(struct kvm_xsave))
+ return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave);
+ else
+ return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave);
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1096,25 +1144,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+ r);
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
+ r = vcpu_save_xsave_state(vm, vcpu, state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+ r);
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
@@ -1123,17 +1171,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
}
r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+ r);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+ r);
TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
} else
state->nested.size = 0;
@@ -1141,12 +1189,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
for (i = 0; i < nmsrs; i++)
state->msrs.entries[i].index = list->indices[i];
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
+ r, r == nmsrs ? -1 : list->indices[r]);
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+ r);
free(list);
return state;
@@ -1157,9 +1205,14 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
+ r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+ TEST_ASSERT(r == state->msrs.nmsrs,
+ "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+ r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
@@ -1167,41 +1220,43 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r);
}
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+ r);
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+ r);
if (state->nested.size) {
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
+ r);
}
}
-bool is_intel_cpu(void)
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
{
+ free(state->xsave);
+ free(state);
+}
+
+static bool cpu_vendor_string_is(const char *vendor)
+{
+ const uint32_t *chunk = (const uint32_t *)vendor;
int eax, ebx, ecx, edx;
- const uint32_t *chunk;
const int leaf = 0;
__asm__ __volatile__(
@@ -1210,10 +1265,22 @@ bool is_intel_cpu(void)
"=c"(ecx), "=d"(edx)
: /* input */ "0"(leaf), "2"(0));
- chunk = (const uint32_t *)("GenuineIntel");
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
+bool is_intel_cpu(void)
+{
+ return cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+bool is_amd_cpu(void)
+{
+ return cpu_vendor_string_is("AuthenticAMD");
+}
+
uint32_t kvm_get_cpuid_max_basic(void)
{
return kvm_get_supported_cpuid_entry(0)->eax;
@@ -1337,6 +1404,23 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
}
}
+struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+ if (cur->function == function && cur->index == index)
+ return cur;
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
bool set_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *ent)
{
@@ -1432,22 +1516,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
return cpuid;
}
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
-
-static inline unsigned x86_family(unsigned int eax)
-{
- unsigned int x86;
-
- x86 = (eax >> 8) & 0xf;
-
- if (x86 == 0xf)
- x86 += (eax >> 20) & 0xff;
-
- return x86;
-}
-
unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
@@ -1457,11 +1525,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
- eax = ecx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
- ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
- edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+ if (!is_amd_cpu())
return max_gfn;
/* On parts with <40 physical address bits, the area is fully hidden */
@@ -1471,6 +1535,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
/* Before family 17h, the HyperTransport area is just below 1T. */
ht_gfn = (1 << 28) - num_ht_pages;
eax = 1;
+ ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
if (x86_family(eax) < 0x17)
goto done;
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..523c1e99ed64
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define VCPU_ID 0
+#define X86_FEATURE_XSAVE (1 << 26)
+#define X86_FEATURE_OSXSAVE (1 << 27)
+
+#define PAGE_SIZE (1 << 12)
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XFEATURE_XTILECFG 17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define TILE_CPUID 0x1d
+#define XSTATE_CPUID 0xd
+#define TILE_PALETTE_CPUID_SUBLEAVE 0x1
+#define XSTATE_USER_STATE_SUBLEAVE 0x0
+
+#define XSAVE_HDR_OFFSET 512
+
+struct xsave_data {
+ u8 area[XSAVE_SIZE];
+} __aligned(64);
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline u64 __xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void __xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void check_cpuid_xsave(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ eax = 1;
+ ecx = 0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(ecx & X86_FEATURE_XSAVE))
+ GUEST_ASSERT(!"cpuid: no CPU xsave support!");
+ if (!(ecx & X86_FEATURE_OSXSAVE))
+ GUEST_ASSERT(!"cpuid: no OS xsave support!");
+}
+
+static bool check_xsave_supports_xtile(void)
+{
+ return __xgetbv(0) & XFEATURE_MASK_XTILE;
+}
+
+static bool enum_xtile_config(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = TILE_CPUID;
+ ecx = TILE_PALETTE_CPUID_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx || !ecx)
+ return false;
+
+ xtile.max_names = ebx >> 16;
+ if (xtile.max_names < NUM_TILES)
+ return false;
+
+ xtile.bytes_per_tile = eax >> 16;
+ if (xtile.bytes_per_tile < TILE_SIZE)
+ return false;
+
+ xtile.bytes_per_row = ebx;
+ xtile.max_rows = ecx;
+
+ return true;
+}
+
+static bool enum_xsave_tile(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = XSTATE_CPUID;
+ ecx = XFEATURE_XTILEDATA;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx)
+ return false;
+
+ xtile.xsave_offset = ebx;
+ xtile.xsave_size = eax;
+
+ return true;
+}
+
+static bool check_xsave_size(void)
+{
+ u32 eax, ebx, ecx, edx;
+ bool valid = false;
+
+ eax = XSTATE_CPUID;
+ ecx = XSTATE_USER_STATE_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx && ebx <= XSAVE_SIZE)
+ valid = true;
+
+ return valid;
+}
+
+static bool check_xtile_info(void)
+{
+ bool ret = false;
+
+ if (!check_xsave_size())
+ return ret;
+
+ if (!enum_xsave_tile())
+ return ret;
+
+ if (!enum_xtile_config())
+ return ret;
+
+ if (sizeof(struct tile_data) >= xtile.xsave_size)
+ ret = true;
+
+ return ret;
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void set_xstatebv(void *data, uint64_t bv)
+{
+ *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv;
+}
+
+static u64 get_xstatebv(void *data)
+{
+ return *(u64 *)(data + XSAVE_HDR_OFFSET);
+}
+
+static void init_regs(void)
+{
+ uint64_t cr4, xcr0;
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+
+ xcr0 = __xgetbv(0);
+ xcr0 |= XFEATURE_MASK_XTILE;
+ __xsetbv(0x0, xcr0);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xsave_data *xsave_data)
+{
+ init_regs();
+ check_cpuid_xsave();
+ GUEST_ASSERT(check_xsave_supports_xtile());
+ GUEST_ASSERT(check_xtile_info());
+
+ /* check xtile configs */
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(xtile.max_names == 8);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ GUEST_ASSERT(xtile.max_rows == 16);
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /* bit 18 not in the XCOMP_BV after xsavec() */
+ set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA);
+ __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_regs regs1, regs2;
+ bool amx_supported = false;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int xsave_restore_size = 0;
+ vm_vaddr_t amx_cfg, tiledata, xsavedata;
+ struct ucall uc;
+ u32 amx_offset;
+ int stage, ret;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ entry = kvm_get_supported_cpuid_entry(1);
+ if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+ print_skip("XSAVE feature not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (kvm_get_cpuid_max_basic() >= 0xd) {
+ entry = kvm_get_supported_cpuid_index(0xd, 0);
+ amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE);
+ if (!amx_supported) {
+ print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax);
+ exit(KSFT_SKIP);
+ }
+ /* Get xsave/restore max size */
+ xsave_restore_size = entry->ecx;
+ }
+
+ run = vcpu_state(vm, VCPU_ID);
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ /* Register #NM handler */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* xsave data for guest_code */
+ xsavedata = vm_vaddr_alloc_pages(vm, 3);
+ memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata);
+
+ for (stage = 1; ; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vm, VCPU_ID);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index a711f83749ea..16d2465c5634 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -154,6 +154,34 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
return guest_cpuids;
}
+static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = get_cpuid(cpuid, 0x7, 0);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = get_cpuid(cpuid, 0x80000008, 0);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
int main(void)
{
struct kvm_cpuid2 *supp_cpuid, *cpuid2;
@@ -175,5 +203,7 @@ int main(void)
for (stage = 0; stage < 3; stage++)
run_vcpu(vm, VCPU_ID, stage);
+ set_cpuid_after_run(vm, cpuid2);
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 2b46dcca86a8..4c7841dfd481 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm)
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..c715adcbd487
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * In lieu of copying perf_event.h into tools...
+ */
+#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_ebx {
+ struct {
+ unsigned int no_unhalted_core_cycles:1;
+ unsigned int no_instructions_retired:1;
+ unsigned int no_unhalted_reference_cycles:1;
+ unsigned int no_llc_reference:1;
+ unsigned int no_llc_misses:1;
+ unsigned int no_branch_instruction_retired:1;
+ unsigned int no_branch_misses_retired:1;
+ } split;
+ unsigned int full;
+};
+
+/* End of stuff taken from perf_event.h. */
+
+/* Oddly, this isn't in perf_event.h. */
+#define ARCH_PERFMON_BRANCHES_RETIRED 5
+
+#define VCPU_ID 0
+#define NUM_BRANCHES 42
+
+/*
+ * This is how the event selector and unit mask are stored in an AMD
+ * core performance event-select register. Intel's format is similar,
+ * but the event selector is only 8 bits.
+ */
+#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
+ (umask & 0xff) << 8)
+
+/*
+ * "Branch instructions retired", from the Intel SDM, volume 3,
+ * "Pre-defined Architectural Performance Events."
+ */
+
+#define INTEL_BR_RETIRED EVENT(0xc4, 0)
+
+/*
+ * "Retired branch instructions", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+
+#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
+
+/*
+ * This event list comprises Intel's eight architectural events plus
+ * AMD's "retired branch instructions" for Zen[123] (and possibly
+ * other AMD CPUs).
+ */
+static const uint64_t event_list[] = {
+ EVENT(0x3c, 0),
+ EVENT(0xc0, 0),
+ EVENT(0x3c, 1),
+ EVENT(0x2e, 0x4f),
+ EVENT(0x2e, 0x41),
+ EVENT(0xc4, 0),
+ EVENT(0xc5, 0),
+ EVENT(0xa4, 1),
+ AMD_ZEN_BR_RETIRED,
+};
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(0);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ br0 = rdmsr(MSR_IA32_PMC0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_IA32_PMC0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(1);
+
+ for (;;) {
+ uint64_t br0, br1;
+
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+ br0 = rdmsr(MSR_K7_PERFCTR0);
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ br1 = rdmsr(MSR_K7_PERFCTR0);
+ GUEST_SYNC(br1 - br0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vm_to_sync(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ get_ucall(vm, VCPU_ID, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vm *vm)
+{
+ bool success;
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ success = run_vm_to_sync(vm);
+ vm_install_exception_handler(vm, GP_VECTOR, NULL);
+
+ return success;
+}
+
+static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+{
+ struct kvm_pmu_event_filter *f;
+ int size = sizeof(*f) + nevents * sizeof(f->events[0]);
+
+ f = malloc(size);
+ TEST_ASSERT(f, "Out of memory");
+ memset(f, 0, size);
+ f->nevents = nevents;
+ return f;
+}
+
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+ struct kvm_pmu_event_filter *f;
+ int i;
+
+ f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+ f->action = action;
+ for (i = 0; i < ARRAY_SIZE(event_list); i++)
+ f->events[i] = event_list[i];
+
+ return f;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
+ uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+ return f;
+}
+
+static void test_without_filter(struct kvm_vm *vm)
+{
+ uint64_t count = run_vm_to_sync(vm);
+
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static uint64_t test_with_filter(struct kvm_vm *vm,
+ struct kvm_pmu_event_filter *f)
+{
+ vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
+ return run_vm_to_sync(vm);
+}
+
+static void test_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+static void test_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count = test_with_filter(vm, f);
+
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_deny_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_allow_list(struct kvm_vm *vm)
+{
+ struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ uint64_t count;
+
+ remove_event(f, INTEL_BR_RETIRED);
+ remove_event(f, AMD_ZEN_BR_RETIRED);
+ count = test_with_filter(vm, f);
+ free(f);
+ if (count)
+ pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+ __func__, count);
+ TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+/*
+ * Check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, an EBX bit vector of length greater
+ * than 5, and EBX[5] clear.
+ */
+static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
+{
+ union cpuid10_eax eax = { .full = entry->eax };
+ union cpuid10_ebx ebx = { .full = entry->ebx };
+
+ return eax.split.version_id && eax.split.num_counters > 0 &&
+ eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
+ !ebx.split.no_branch_instruction_retired;
+}
+
+/*
+ * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
+ * clear on AMD hardware.
+ */
+static bool use_intel_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(0xa, 0);
+ return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
+}
+
+static bool is_zen1(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+}
+
+static bool is_zen2(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 &&
+ x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+}
+
+static bool is_zen3(uint32_t eax)
+{
+ return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_get_supported_cpuid_index(1, 0);
+ return is_amd_cpu() && entry &&
+ (is_zen1(entry->eax) ||
+ is_zen2(entry->eax) ||
+ is_zen3(entry->eax));
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void) = NULL;
+ struct kvm_vm *vm;
+ int r;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
+ if (!r) {
+ print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (use_intel_pmu())
+ guest_code = intel_guest_code;
+ else if (use_amd_pmu())
+ guest_code = amd_guest_code;
+
+ if (!guest_code) {
+ print_skip("Don't know how to test this guest PMU");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (!sanity_check_pmu(vm)) {
+ print_skip("Guest PMU is not functional");
+ exit(KSFT_SKIP);
+ }
+
+ test_without_filter(vm);
+ test_member_deny_list(vm);
+ test_member_allow_list(vm);
+ test_not_member_deny_list(vm);
+ test_not_member_allow_list(vm);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index 29b18d565cf4..80056bbbb003 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -21,7 +21,7 @@
#define NR_LOCK_TESTING_THREADS 3
#define NR_LOCK_TESTING_ITERATIONS 10000
-static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
@@ -31,9 +31,19 @@ static void sev_ioctl(int vm_fd, int cmd_id, void *data)
int ret;
ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
- TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
+ *fw_error = cmd.error;
+ return ret;
+}
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ int ret;
+ __u32 fw_error;
+
+ ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
+ TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
"%d failed: return code: %d, errno: %d, fw error: %d",
- cmd_id, ret, errno, cmd.error);
+ cmd_id, ret, errno, fw_error);
}
static struct kvm_vm *sev_vm_create(bool es)
@@ -225,12 +235,45 @@ static void sev_mirror_create(int dst_fd, int src_fd)
TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
}
+static void verify_mirror_allowed_cmds(int vm_fd)
+{
+ struct kvm_sev_guest_status status;
+
+ for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+ __u32 fw_error;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d\n",
+ cmd_id, ret, errno);
+ }
+
+ sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+}
+
static void test_sev_mirror(bool es)
{
struct kvm_vm *src_vm, *dst_vm;
- struct kvm_sev_launch_start start = {
- .policy = es ? SEV_POLICY_ES : 0
- };
int i;
src_vm = sev_vm_create(es);
@@ -241,10 +284,12 @@ static void test_sev_mirror(bool es)
/* Check that we can complete creation of the mirror VM. */
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(dst_vm, i);
- sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start);
+
if (es)
sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ verify_mirror_allowed_cmds(dst_vm->fd);
+
kvm_vm_free(src_vm);
kvm_vm_free(dst_vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index d0fe2fdce58c..2da8eb8e2d96 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -212,7 +212,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 32854c1462ad..2e0a92da8ff5 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -218,7 +218,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index 5a6a662f2e59..a426078b16a3 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
switch (get_ucall(vm, vcpuid, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
+ uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
return;
case UCALL_DONE:
return;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 2835a17f1b7a..edac8839e717 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -30,8 +30,8 @@ static struct kvm_vm *vm;
static void l2_guest_code(void)
{
/* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..27a850f3d7ce
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(void)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d\n",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(void)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state();
+ __run_vcpu_with_invalid_state();
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+}
+
+static void set_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(true);
+}
+
+static void clear_invalid_guest_state(void)
+{
+ set_or_clear_invalid_guest_state(false);
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) {
+ print_skip("Must be run with kvm_intel.unrestricted_guest=0");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *)guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state();
+ run_vcpu_with_invalid_state();
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state();
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a07480aed397..ff92e25b6f1e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index a0699f00b3d6..865e17146815 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,6 +14,9 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
+#include <signal.h>
+
+#include <sys/eventfd.h>
#define VCPU_ID 5
@@ -22,10 +25,15 @@
#define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
@@ -38,20 +46,20 @@ static struct kvm_vm *vm;
#define MIN_STEAL_TIME 50000
struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
} __attribute__((__packed__)); /* 32 bytes */
struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
+ u32 version;
+ u32 sec;
+ u32 nsec;
} __attribute__((__packed__));
struct vcpu_runstate_info {
@@ -66,22 +74,44 @@ struct arch_vcpu_info {
};
struct vcpu_info {
- uint8_t evtchn_upcall_pending;
- uint8_t evtchn_upcall_mask;
- unsigned long evtchn_pending_sel;
- struct arch_vcpu_info arch;
- struct pvclock_vcpu_time_info time;
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
#define RUNSTATE_running 0
#define RUNSTATE_runnable 1
#define RUNSTATE_blocked 2
#define RUNSTATE_offline 3
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
static void evtchn_handler(struct ex_regs *regs)
{
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
vi->evtchn_upcall_pending = 0;
+ vi->evtchn_pending_sel = 0;
GUEST_SYNC(0x20);
}
@@ -127,7 +157,25 @@ static void guest_code(void)
GUEST_SYNC(6);
GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
- GUEST_DONE();
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(7);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(8);
+
+ while (!si->evtchn_pending[1])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(9);
+
+ for (;;)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
}
static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
return 0;
}
+static void handle_alrm(int sig)
+{
+ TEST_FAIL("IRQ delivery timed out");
+}
+
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
+ bool verbose;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
@@ -155,6 +212,7 @@ int main(int argc, char *argv[])
}
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
clock_gettime(CLOCK_REALTIME, &min_ts);
@@ -166,6 +224,11 @@ int main(int argc, char *argv[])
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
+ struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
.msr = XEN_HYPERCALL_MSR,
@@ -184,6 +247,16 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
struct kvm_xen_vcpu_attr vi = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
.u.gpa = VCPU_INFO_ADDR,
@@ -214,6 +287,49 @@ int main(int argc, char *argv[])
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
}
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = 15;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = 66;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
vinfo->evtchn_upcall_pending = 0;
@@ -248,6 +364,8 @@ int main(int argc, char *argv[])
switch (uc.args[1]) {
case 0:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
evtchn_irq_expected = true;
vinfo->evtchn_upcall_pending = 1;
break;
@@ -256,11 +374,16 @@ int main(int argc, char *argv[])
TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
if (!do_runstate_tests)
goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1];
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 4:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = (uint64_t)-1;
@@ -274,6 +397,8 @@ int main(int argc, char *argv[])
break;
case 5:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = RUNSTATE_running;
@@ -282,16 +407,54 @@ int main(int argc, char *argv[])
rst.u.runstate.time_offline = 0x5a;
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 6:
+ if (verbose)
+ printf("Testing steal time\n");
/* Yield until scheduler delay exceeds target */
rundelay = get_run_delay() + MIN_STEAL_TIME;
do {
sched_yield();
} while (get_run_delay() < rundelay);
break;
+
+ case 7:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 0x8000;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case 8:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case 9:
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
case 0x20:
TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
evtchn_irq_expected = false;
+ if (shinfo->evtchn_pending[1] &&
+ shinfo->evtchn_pending[0])
+ goto done;
break;
}
break;
@@ -318,9 +481,19 @@ int main(int argc, char *argv[])
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
vm_ts.tv_sec = wc->sec;
vm_ts.tv_nsec = wc->nsec;
- TEST_ASSERT(wc->version && !(wc->version & 1),
+ TEST_ASSERT(wc->version && !(wc->version & 1),
"Bad wallclock version %x", wc->version);
TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
@@ -341,6 +514,15 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
"State entry time mismatch");
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
index 1b4d95d575f8..14fedeef762e 100755
--- a/tools/testing/selftests/lkdtm/stack-entropy.sh
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -4,13 +4,27 @@
# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
set -e
samples="${1:-1000}"
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
# Capture dmesg continuously since it may fill up depending on sample size.
log=$(mktemp -t stack-entropy-XXXXXX)
dmesg --follow >"$log" & pid=$!
report=-1
for i in $(seq 1 $samples); do
- echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
+ echo "REPORT_STACK" > $TRIGGER
if [ -t 1 ]; then
percent=$(( 100 * $i / $samples ))
if [ "$percent" -ne "$report" ]; then
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 412d85205546..3f4c8cfe7aca 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -4059,6 +4059,9 @@ usage: ${0##*/} OPTS
-p Pause on fail
-P Pause after each test
-v Be verbose
+
+Tests:
+ $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
EOF
}
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
index 694d70710ff0..dfc27cdc6c05 100644
--- a/tools/testing/selftests/net/settings
+++ b/tools/testing/selftests/net/settings
@@ -1 +1 @@
-timeout=300
+timeout=1500
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index fe8fcfb334e0..a5cb4b09a46c 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -108,7 +112,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -574,5 +578,7 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 257df94697a5..2a7c33631a29 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -4,7 +4,11 @@
*
* Example of remapping huge page memory in a user application using the
* mremap system call. Code assumes a hugetlbfs filesystem is mounted
- * at './huge'. The code will use 10MB worth of huge pages.
+ * at './huge'. The amount of memory used by this test is decided by a command
+ * line argument in MBs. If missing, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
*/
#define _GNU_SOURCE
@@ -18,8 +22,10 @@
#include <linux/userfaultfd.h>
#include <sys/ioctl.h>
-#define LENGTH (1UL * 1024 * 1024 * 1024)
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
+#define FILE_NAME "huge/hugepagefile"
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
@@ -28,20 +34,20 @@ static void check_bytes(char *addr)
printf("First hex is %x\n", *((unsigned int *)addr));
}
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t len)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t len)
{
unsigned long i;
check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
if (*(addr + i) != (char)i) {
printf("Mismatch at %lu\n", i);
return 1;
@@ -99,11 +105,19 @@ static void register_region_with_uffd(char *addr, size_t len)
}
}
-int main(void)
+int main(int argc, char *argv[])
{
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length. Any additional args are ignored.
+ */
+ size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL;
+
+ length = length > 0 ? length : DEFAULT_LENGTH_MB;
+ length = MB_TO_BYTES(length);
+
int ret = 0;
- int fd = open("/huge/test", O_CREAT | O_RDWR, 0755);
+ int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
perror("Open failed");
@@ -112,7 +126,7 @@ int main(void)
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
- void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map haddr: Returned address is %p\n", haddr);
if (haddr == MAP_FAILED) {
@@ -122,7 +136,7 @@ int main(void)
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
- void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map daddr: Returned address is %p\n", daddr);
if (daddr == MAP_FAILED) {
@@ -132,16 +146,16 @@ int main(void)
suggested_addr = 0x7faa40000000;
void *vaddr =
- mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0);
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
printf("Map vaddr: Returned address is %p\n", vaddr);
if (vaddr == MAP_FAILED) {
perror("mmap2");
exit(1);
}
- register_region_with_uffd(haddr, LENGTH);
+ register_region_with_uffd(haddr, length);
- void *addr = mremap(haddr, LENGTH, LENGTH,
+ void *addr = mremap(haddr, length, length,
MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
if (addr == MAP_FAILED) {
perror("mremap");
@@ -150,10 +164,10 @@ int main(void)
printf("Mremap: Returned address is %p\n", addr);
check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
- munmap(addr, LENGTH);
+ munmap(addr, length);
return ret;
}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index 4a9a3afe9fd4..bf2d2a684edf 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge/'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index a24d30af3094..75d401741394 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -111,7 +111,7 @@ fi
echo "-----------------------"
echo "running hugepage-mremap"
echo "-----------------------"
-./hugepage-mremap
+./hugepage-mremap 256
if [ $? -ne 0 ]; then
echo "[FAIL]"
exitcode=1
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9354a5e0321c..d3fd24f9fae8 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -87,7 +87,7 @@ static bool test_uffdio_minor = false;
static bool map_shared;
static int shm_fd;
-static int huge_fd = -1; /* only used for hugetlb_shared test */
+static int huge_fd;
static char *huge_fd_off0;
static unsigned long long *count_verify;
static int uffd = -1;
@@ -223,9 +223,6 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
static void hugetlb_release_pages(char *rel_area)
{
- if (huge_fd == -1)
- return;
-
if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
nr_pages * page_size))
@@ -238,17 +235,17 @@ static void hugetlb_allocate_area(void **alloc_area)
char **alloc_area_alias;
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- map_shared ? MAP_SHARED :
- MAP_PRIVATE | MAP_HUGETLB |
+ (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ MAP_HUGETLB |
(*alloc_area == area_src ? 0 : MAP_NORESERVE),
- huge_fd,
- *alloc_area == area_src ? 0 : nr_pages * page_size);
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
if (*alloc_area == MAP_FAILED)
err("mmap of hugetlbfs file failed");
if (map_shared) {
area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED,
+ MAP_SHARED | MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (area_alias == MAP_FAILED)
@@ -648,7 +645,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret != sizeof(*msg)) {
if (ret < 0) {
- if (errno == EAGAIN)
+ if (errno == EAGAIN || errno == EINTR)
return 1;
err("blocking read error");
} else {
@@ -724,8 +721,11 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
err("poll error: %d", ret);
+ }
if (pollfd[1].revents & POLLIN) {
if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
err("read pipefd error");
@@ -1417,7 +1417,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
- char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1528,13 +1527,9 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
+ swap(area_src, area_dst);
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
+ swap(area_src_alias, area_dst_alias);
uffd_stats_report(uffd_stats, nr_cpus);
}
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
index d3d0d108924d..70a02301f4c2 100644
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
new file mode 100644
index 000000000000..2d52ff0bff7d
--- /dev/null
+++ b/tools/tracing/rtla/Makefile
@@ -0,0 +1,102 @@
+NAME := rtla
+VERSION := 0.5
+
+# From libtracefs:
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+$(call allow-override,PKG_CONFIG,pkg-config)
+$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/)
+$(call allow-override,LDCONFIG,ldconfig)
+
+INSTALL = install
+FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
+ -fasynchronous-unwind-tables -fstack-clash-protection
+WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
+
+TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
+
+CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS)
+LDFLAGS := -ggdb
+LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps
+
+SRC := $(wildcard src/*.c)
+HDR := $(wildcard src/*.h)
+OBJ := $(SRC:.c=.o)
+DIRS := src
+FILES := Makefile README.txt
+CEXT := bz2
+TARBALL := $(NAME)-$(VERSION).tar.$(CEXT)
+TAROPTS := -cvjf $(TARBALL)
+BINDIR := /usr/bin
+DATADIR := /usr/share
+DOCDIR := $(DATADIR)/doc
+MANDIR := $(DATADIR)/man
+LICDIR := $(DATADIR)/licenses
+SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
+
+# If running from the tarball, man pages are stored in the Documentation
+# dir. If running from the kernel source, man pages are stored in
+# Documentation/tools/rtla/.
+ifneq ($(wildcard Documentation/.*),)
+DOCSRC = Documentation/
+else
+DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/
+endif
+
+.PHONY: all
+all: rtla
+
+rtla: $(OBJ) doc
+ $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS)
+
+static: $(OBJ)
+ $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl
+
+.PHONY: install
+install: doc_install
+ $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)
+ $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR)
+ $(STRIP) $(DESTDIR)$(BINDIR)/rtla
+ @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise
+ @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat
+
+.PHONY: clean tarball
+clean: doc_clean
+ @test ! -f rtla || rm rtla
+ @test ! -f rtla-static || rm rtla-static
+ @test ! -f src/rtla.o || rm src/rtla.o
+ @test ! -f $(TARBALL) || rm -f $(TARBALL)
+ @rm -rf *~ $(OBJ) *.tar.$(CEXT)
+
+tarball: clean
+ rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION)
+ cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION)
+ mkdir $(NAME)-$(VERSION)/Documentation/
+ cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/
+ tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION)
+ rm -rf $(NAME)-$(VERSION)
+
+.PHONY: doc doc_clean doc_install
+doc:
+ $(MAKE) -C $(DOCSRC)
+
+doc_clean:
+ $(MAKE) -C $(DOCSRC) clean
+
+doc_install:
+ $(MAKE) -C $(DOCSRC) install
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
new file mode 100644
index 000000000000..6c88446f7e74
--- /dev/null
+++ b/tools/tracing/rtla/README.txt
@@ -0,0 +1,36 @@
+RTLA: Real-Time Linux Analysis tools
+
+The rtla is a meta-tool that includes a set of commands that
+aims to analyze the real-time properties of Linux. But, instead of
+testing Linux as a black box, rtla leverages kernel tracing
+capabilities to provide precise information about the properties
+and root causes of unexpected results.
+
+Installing RTLA
+
+RTLA depends on some libraries and tools. More precisely, it depends on the
+following libraries:
+
+ - libtracefs
+ - libtraceevent
+ - procps
+
+It also depends on python3-docutils to compile man pages.
+
+For development, we suggest the following steps for compiling rtla:
+
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git
+ $ cd libtraceevent/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git
+ $ cd libtracefs/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ cd $rtla_src
+ $ make
+ $ sudo make install
+
+For further information, please refer to the rtla man page.
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
new file mode 100644
index 000000000000..7b73d1eccd0e
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise_get_cpus - return the original "osnoise/cpus" content
+ *
+ * It also saves the value to be restored.
+ */
+char *osnoise_get_cpus(struct osnoise_context *context)
+{
+ if (context->curr_cpus)
+ return context->curr_cpus;
+
+ if (context->orig_cpus)
+ return context->orig_cpus;
+
+ context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL);
+
+ /*
+ * The error value (NULL) is the same for tracefs_instance_file_read()
+ * and this functions, so:
+ */
+ return context->orig_cpus;
+}
+
+/*
+ * osnoise_set_cpus - configure osnoise to run on *cpus
+ *
+ * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat
+ * will run. This function opens this file, saves the current value,
+ * and set the cpus passed as argument.
+ */
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus)
+{
+ char *orig_cpus = osnoise_get_cpus(context);
+ char buffer[1024];
+ int retval;
+
+ if (!orig_cpus)
+ return -1;
+
+ context->curr_cpus = strdup(cpus);
+ if (!context->curr_cpus)
+ return -1;
+
+ snprintf(buffer, 1024, "%s\n", cpus);
+
+ debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer);
+ if (retval < 0) {
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_cpus - restore the original "osnoise/cpus"
+ *
+ * osnoise_set_cpus() saves the original data for the "osnoise/cpus"
+ * file. This function restore the original config it was previously
+ * modified.
+ */
+void osnoise_restore_cpus(struct osnoise_context *context)
+{
+ int retval;
+
+ if (!context->orig_cpus)
+ return;
+
+ if (!context->curr_cpus)
+ return;
+
+ /* nothing to do? */
+ if (!strcmp(context->orig_cpus, context->curr_cpus))
+ goto out_done;
+
+ debug_msg("restoring cpus to %s", context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus);
+ if (retval < 0)
+ err_msg("could not restore original osnoise cpus\n");
+
+out_done:
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+}
+
+/*
+ * osnoise_put_cpus - restore cpus config and cleanup data
+ */
+void osnoise_put_cpus(struct osnoise_context *context)
+{
+ osnoise_restore_cpus(context);
+
+ if (!context->orig_cpus)
+ return;
+
+ free(context->orig_cpus);
+ context->orig_cpus = NULL;
+}
+
+/*
+ * osnoise_read_ll_config - read a long long value from a config
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_read_ll_config(char *rel_path)
+{
+ long long retval;
+ char *buffer;
+
+ buffer = tracefs_instance_file_read(NULL, rel_path, NULL);
+ if (!buffer)
+ return -1;
+
+ /* get_llong_from_str returns -1 on error */
+ retval = get_llong_from_str(buffer);
+
+ debug_msg("reading %s returned %lld\n", rel_path, retval);
+
+ free(buffer);
+
+ return retval;
+}
+
+/*
+ * osnoise_write_ll_config - write a long long value to a config in rel_path
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_write_ll_config(char *rel_path, long long value)
+{
+ char buffer[BUFF_U64_STR_SIZE];
+ long long retval;
+
+ snprintf(buffer, sizeof(buffer), "%lld\n", value);
+
+ debug_msg("setting %s to %lld\n", rel_path, value);
+
+ retval = tracefs_instance_file_write(NULL, rel_path, buffer);
+ return retval;
+}
+
+/*
+ * osnoise_get_runtime - return the original "osnoise/runtime_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_runtime(struct osnoise_context *context)
+{
+ long long runtime_us;
+
+ if (context->runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->runtime_us;
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_runtime_us;
+
+ runtime_us = osnoise_read_ll_config("osnoise/runtime_us");
+ if (runtime_us < 0)
+ goto out_err;
+
+ context->orig_runtime_us = runtime_us;
+ return runtime_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_period - return the original "osnoise/period_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_period(struct osnoise_context *context)
+{
+ long long period_us;
+
+ if (context->period_us != OSNOISE_TIME_INIT_VAL)
+ return context->period_us;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_period_us;
+
+ period_us = osnoise_read_ll_config("osnoise/period_us");
+ if (period_us < 0)
+ goto out_err;
+
+ context->orig_period_us = period_us;
+ return period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+static int __osnoise_write_runtime(struct osnoise_context *context,
+ unsigned long long runtime)
+{
+ int retval;
+
+ if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/runtime_us", runtime);
+ if (retval < 0)
+ return -1;
+
+ context->runtime_us = runtime;
+ return 0;
+}
+
+static int __osnoise_write_period(struct osnoise_context *context,
+ unsigned long long period)
+{
+ int retval;
+
+ if (context->orig_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/period_us", period);
+ if (retval < 0)
+ return -1;
+
+ context->period_us = period;
+ return 0;
+}
+
+/*
+ * osnoise_set_runtime_period - set osnoise runtime and period
+ *
+ * Osnoise's runtime and period are related as runtime <= period.
+ * Thus, this function saves the original values, and then tries
+ * to set the runtime and period if they are != 0.
+ */
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period)
+{
+ unsigned long long curr_runtime_us;
+ unsigned long long curr_period_us;
+ int retval;
+
+ if (!period && !runtime)
+ return 0;
+
+ curr_runtime_us = osnoise_get_runtime(context);
+ curr_period_us = osnoise_get_period(context);
+
+ /* error getting any value? */
+ if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ if (!period) {
+ if (runtime > curr_period_us)
+ return -1;
+ return __osnoise_write_runtime(context, runtime);
+ } else if (!runtime) {
+ if (period < curr_runtime_us)
+ return -1;
+ return __osnoise_write_period(context, period);
+ }
+
+ if (runtime > curr_period_us) {
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ } else {
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_runtime_period - restore the original runtime and period
+ */
+void osnoise_restore_runtime_period(struct osnoise_context *context)
+{
+ unsigned long long orig_runtime = context->orig_runtime_us;
+ unsigned long long orig_period = context->orig_period_us;
+ unsigned long long curr_runtime = context->runtime_us;
+ unsigned long long curr_period = context->period_us;
+ int retval;
+
+ if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL))
+ return;
+
+ if ((orig_period == curr_period) && (orig_runtime == curr_runtime))
+ goto out_done;
+
+ retval = osnoise_set_runtime_period(context, orig_runtime, orig_period);
+ if (retval)
+ err_msg("Could not restore original osnoise runtime/period\n");
+
+out_done:
+ context->runtime_us = OSNOISE_TIME_INIT_VAL;
+ context->period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_runtime_period - restore original values and cleanup data
+ */
+void osnoise_put_runtime_period(struct osnoise_context *context)
+{
+ osnoise_restore_runtime_period(context);
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_runtime_us = OSNOISE_TIME_INIT_VAL;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us"
+ */
+static long long
+osnoise_get_timerlat_period_us(struct osnoise_context *context)
+{
+ long long timerlat_period_us;
+
+ if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->timerlat_period_us;
+
+ if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_timerlat_period_us;
+
+ timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us");
+ if (timerlat_period_us < 0)
+ goto out_err;
+
+ context->orig_timerlat_period_us = timerlat_period_us;
+ return timerlat_period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_set_timerlat_period_us - set "timerlat_period_us"
+ */
+int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us)
+{
+ long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context);
+ int retval;
+
+ if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us);
+ if (retval < 0)
+ return -1;
+
+ context->timerlat_period_us = timerlat_period_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_timerlat_period_us - restore "timerlat_period_us"
+ */
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ if (context->orig_timerlat_period_us == context->timerlat_period_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise timerlat_period_us\n");
+
+out_done:
+ context->timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_timerlat_period_us - restore original values and cleanup data
+ */
+void osnoise_put_timerlat_period_us(struct osnoise_context *context)
+{
+ osnoise_restore_timerlat_period_us(context);
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_us - read and save the original "stop_tracing_us"
+ */
+static long long
+osnoise_get_stop_us(struct osnoise_context *context)
+{
+ long long stop_us;
+
+ if (context->stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_us;
+
+ if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_us;
+
+ stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us");
+ if (stop_us < 0)
+ goto out_err;
+
+ context->orig_stop_us = stop_us;
+ return stop_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_us - set "stop_tracing_us"
+ */
+int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us)
+{
+ long long curr_stop_us = osnoise_get_stop_us(context);
+ int retval;
+
+ if (curr_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_us = stop_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_us - restore the original "stop_tracing_us"
+ */
+void osnoise_restore_stop_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_us == context->stop_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_us\n");
+
+out_done:
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_us(context);
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us"
+ */
+static long long
+osnoise_get_stop_total_us(struct osnoise_context *context)
+{
+ long long stop_total_us;
+
+ if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_total_us;
+
+ if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_total_us;
+
+ stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us");
+ if (stop_total_us < 0)
+ goto out_err;
+
+ context->orig_stop_total_us = stop_total_us;
+ return stop_total_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_total_us - set "stop_tracing_total_us"
+ */
+int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us)
+{
+ long long curr_stop_total_us = osnoise_get_stop_total_us(context);
+ int retval;
+
+ if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_total_us = stop_total_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us"
+ */
+void osnoise_restore_stop_total_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_total_us == context->stop_total_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us",
+ context->orig_stop_total_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_total_us\n");
+
+out_done:
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_total_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_total_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_total_us(context);
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_print_stack - read and save the original "print_stack"
+ */
+static long long
+osnoise_get_print_stack(struct osnoise_context *context)
+{
+ long long print_stack;
+
+ if (context->print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->print_stack;
+
+ if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_print_stack;
+
+ print_stack = osnoise_read_ll_config("osnoise/print_stack");
+ if (print_stack < 0)
+ goto out_err;
+
+ context->orig_print_stack = print_stack;
+ return print_stack;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_print_stack - set "print_stack"
+ */
+int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack)
+{
+ long long curr_print_stack = osnoise_get_print_stack(context);
+ int retval;
+
+ if (curr_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", print_stack);
+ if (retval < 0)
+ return -1;
+
+ context->print_stack = print_stack;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_print_stack - restore the original "print_stack"
+ */
+void osnoise_restore_print_stack(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_print_stack == context->print_stack)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise print_stack\n");
+
+out_done:
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_print_stack - restore original values and cleanup data
+ */
+void osnoise_put_print_stack(struct osnoise_context *context)
+{
+ osnoise_restore_print_stack(context);
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * enable_osnoise - enable osnoise tracer in the trace_instance
+ */
+int enable_osnoise(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "osnoise");
+}
+
+/*
+ * enable_timerlat - enable timerlat tracer in the trace_instance
+ */
+int enable_timerlat(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "timerlat");
+}
+
+enum {
+ FLAG_CONTEXT_NEWLY_CREATED = (1 << 0),
+ FLAG_CONTEXT_DELETED = (1 << 1),
+};
+
+/*
+ * osnoise_get_context - increase the usage of a context and return it
+ */
+int osnoise_get_context(struct osnoise_context *context)
+{
+ int ret;
+
+ if (context->flags & FLAG_CONTEXT_DELETED) {
+ ret = -1;
+ } else {
+ context->ref++;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * osnoise_context_alloc - alloc an osnoise_context
+ *
+ * The osnoise context contains the information of the "osnoise/" configs.
+ * It is used to set and restore the config.
+ */
+struct osnoise_context *osnoise_context_alloc(void)
+{
+ struct osnoise_context *context;
+
+ context = calloc(1, sizeof(*context));
+ if (!context)
+ return NULL;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+
+ osnoise_get_context(context);
+
+ return context;
+}
+
+/*
+ * osnoise_put_context - put the osnoise_put_context
+ *
+ * If there is no other user for the context, the original data
+ * is restored.
+ */
+void osnoise_put_context(struct osnoise_context *context)
+{
+ if (--context->ref < 1)
+ context->flags |= FLAG_CONTEXT_DELETED;
+
+ if (!(context->flags & FLAG_CONTEXT_DELETED))
+ return;
+
+ osnoise_put_cpus(context);
+ osnoise_put_runtime_period(context);
+ osnoise_put_stop_us(context);
+ osnoise_put_stop_total_us(context);
+ osnoise_put_timerlat_period_us(context);
+ osnoise_put_print_stack(context);
+
+ free(context);
+}
+
+/*
+ * osnoise_destroy_tool - disable trace, restore configs and free data
+ */
+void osnoise_destroy_tool(struct osnoise_tool *top)
+{
+ trace_instance_destroy(&top->trace);
+
+ if (top->context)
+ osnoise_put_context(top->context);
+
+ free(top);
+}
+
+/*
+ * osnoise_init_tool - init an osnoise tool
+ *
+ * It allocs data, create a context to store data and
+ * creates a new trace instance for the tool.
+ */
+struct osnoise_tool *osnoise_init_tool(char *tool_name)
+{
+ struct osnoise_tool *top;
+ int retval;
+
+ top = calloc(1, sizeof(*top));
+ if (!top)
+ return NULL;
+
+ top->context = osnoise_context_alloc();
+ if (!top->context)
+ goto out_err;
+
+ retval = trace_instance_init(&top->trace, tool_name);
+ if (retval)
+ goto out_err;
+
+ return top;
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+/*
+ * osnoise_init_trace_tool - init a tracer instance to trace osnoise events
+ */
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer)
+{
+ struct osnoise_tool *trace;
+ int retval;
+
+ trace = osnoise_init_tool("osnoise_trace");
+ if (!trace)
+ return NULL;
+
+ retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL);
+ if (retval < 0 && !errno) {
+ err_msg("Could not find osnoise events\n");
+ goto out_err;
+ }
+
+ retval = enable_tracer_by_name(trace->trace.inst, tracer);
+ if (retval) {
+ err_msg("Could not enable osnoiser tracer for tracing\n");
+ goto out_err;
+ }
+
+ return trace;
+out_err:
+ osnoise_destroy_tool(trace);
+ return NULL;
+}
+
+static void osnoise_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "osnoise version " VERSION,
+ "",
+ " usage: [rtla] osnoise [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from osnoise tracer",
+ " hist - prints a histogram of osnoise samples",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int osnoise_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if osnoise was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ osnoise_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ osnoise_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ osnoise_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ osnoise_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ osnoise_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ osnoise_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
new file mode 100644
index 000000000000..9e4b2e2a4559
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "trace.h"
+
+/*
+ * osnoise_context - read, store, write, restore osnoise configs.
+ */
+struct osnoise_context {
+ int flags;
+ int ref;
+
+ char *curr_cpus;
+ char *orig_cpus;
+
+ /* 0 as init value */
+ unsigned long long orig_runtime_us;
+ unsigned long long runtime_us;
+
+ /* 0 as init value */
+ unsigned long long orig_period_us;
+ unsigned long long period_us;
+
+ /* 0 as init value */
+ long long orig_timerlat_period_us;
+ long long timerlat_period_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_us;
+ long long stop_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_total_us;
+ long long stop_total_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_print_stack;
+ long long print_stack;
+};
+
+/*
+ * *_INIT_VALs are also invalid values, they are used to
+ * communicate errors.
+ */
+#define OSNOISE_OPTION_INIT_VAL (-1)
+#define OSNOISE_TIME_INIT_VAL (0)
+
+struct osnoise_context *osnoise_context_alloc(void);
+int osnoise_get_context(struct osnoise_context *context);
+void osnoise_put_context(struct osnoise_context *context);
+
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus);
+void osnoise_restore_cpus(struct osnoise_context *context);
+
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period);
+void osnoise_restore_runtime_period(struct osnoise_context *context);
+
+int osnoise_set_stop_us(struct osnoise_context *context,
+ long long stop_us);
+void osnoise_restore_stop_us(struct osnoise_context *context);
+
+int osnoise_set_stop_total_us(struct osnoise_context *context,
+ long long stop_total_us);
+void osnoise_restore_stop_total_us(struct osnoise_context *context);
+
+int osnoise_set_timerlat_period_us(struct osnoise_context *context,
+ long long timerlat_period_us);
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context);
+
+void osnoise_restore_print_stack(struct osnoise_context *context);
+int osnoise_set_print_stack(struct osnoise_context *context,
+ long long print_stack);
+
+/*
+ * osnoise_tool - osnoise based tool definition.
+ */
+struct osnoise_tool {
+ struct trace_instance trace;
+ struct osnoise_context *context;
+ void *data;
+ void *params;
+ time_t start_time;
+};
+
+void osnoise_destroy_tool(struct osnoise_tool *top);
+struct osnoise_tool *osnoise_init_tool(char *tool_name);
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+
+int osnoise_hist_main(int argc, char *argv[]);
+int osnoise_top_main(int argc, char **argv);
+int osnoise_main(int argc, char **argv);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
new file mode 100644
index 000000000000..180fcbe423cd
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+
+struct osnoise_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int set_sched;
+ int output_divisor;
+ struct sched_attr sched_param;
+
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct osnoise_hist_cpu {
+ int *samples;
+ int count;
+
+ unsigned long long min_sample;
+ unsigned long long sum_sample;
+ unsigned long long max_sample;
+
+};
+
+struct osnoise_hist_data {
+ struct tracefs_hist *trace_hist;
+ struct osnoise_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_histogram - free runtime data
+ */
+static void
+osnoise_free_histogram(struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].samples)
+ free(data->hist[cpu].samples);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_hist_data
+*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct osnoise_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1));
+ if (!data->hist[cpu].samples)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ data->hist[cpu].min_sample = ~0;
+
+ return data;
+
+cleanup:
+ osnoise_free_histogram(data);
+ return NULL;
+}
+
+static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
+ unsigned long long duration, int count)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ duration = duration / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = duration / data->bucket_size;
+
+ hist = data->hist[cpu].samples;
+ data->hist[cpu].count += count;
+ update_min(&data->hist[cpu].min_sample, &duration);
+ update_sum(&data->hist[cpu].sum_sample, &duration);
+ update_max(&data->hist[cpu].max_sample, &duration);
+
+ if (bucket < entries)
+ hist[bucket] += count;
+ else
+ hist[entries] += count;
+}
+
+/*
+ * osnoise_destroy_trace_hist - disable events used to collect histogram
+ */
+static void osnoise_destroy_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+ tracefs_hist_destroy(tool->trace.inst, data->trace_hist);
+}
+
+/*
+ * osnoise_init_trace_hist - enable events used to collect histogram
+ */
+static int osnoise_init_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int bucket_size;
+ char buff[128];
+ int retval = 0;
+
+ /*
+ * Set the size of the bucket.
+ */
+ bucket_size = params->output_divisor * params->bucket_size;
+ snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size);
+
+ data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold",
+ buff, TRACEFS_HIST_KEY_NORMAL);
+ if (!data->trace_hist)
+ return 1;
+
+ retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0);
+ if (retval)
+ goto out_err;
+
+ retval = tracefs_hist_start(tool->trace.inst, data->trace_hist);
+ if (retval)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ osnoise_destroy_trace_hist(tool);
+ return 1;
+}
+
+/*
+ * osnoise_read_trace_hist - parse histogram file and file osnoise histogram
+ */
+static void osnoise_read_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ long long cpu, counter, duration;
+ char *content, *position;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+
+ content = tracefs_event_file_read(tool->trace.inst, "osnoise",
+ "sample_threshold",
+ "hist", NULL);
+ if (!content)
+ return;
+
+ position = content;
+ while (true) {
+ position = strstr(position, "duration: ~");
+ if (!position)
+ break;
+ position += strlen("duration: ~");
+ duration = get_llong_from_str(position);
+ if (duration == -1)
+ err_msg("error reading duration from histogram\n");
+
+ position = strstr(position, "cpu:");
+ if (!position)
+ break;
+ position += strlen("cpu: ");
+ cpu = get_llong_from_str(position);
+ if (cpu == -1)
+ err_msg("error reading cpu from histogram\n");
+
+ position = strstr(position, "hitcount:");
+ if (!position)
+ break;
+ position += strlen("hitcount: ");
+ counter = get_llong_from_str(position);
+ if (counter == -1)
+ err_msg("error reading counter from histogram\n");
+
+ osnoise_hist_update_multiple(tool, cpu, duration, counter);
+ }
+ free(content);
+}
+
+/*
+ * osnoise_hist_header - print the header of the tracer to the output
+ */
+static void osnoise_hist_header(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA osnoise histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(s, " CPU-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * osnoise_print_summary - print the summary of the hist data to the output
+ */
+static void
+osnoise_print_summary(struct osnoise_hist_params *params,
+ struct trace_instance *trace,
+ struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ if (data->hist[cpu].count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_sample / data->hist[cpu].count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_print_stats - print data for all CPUs
+ */
+static void
+osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ osnoise_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ total += data->hist[cpu].samples[bucket];
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]);
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].samples[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ osnoise_print_summary(params, trace, data);
+}
+
+/*
+ * osnoise_hist_usage - prints osnoise hist usage message
+ */
+static void osnoise_hist_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct osnoise_hist_params
+*osnoise_hist_parse_args(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"bucket-size", required_argument, 0, 'b'},
+ {"entries", required_argument, 0, 'e'},
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-header", no_argument, 0, '0'},
+ {"no-summary", no_argument, 0, '1'},
+ {"no-index", no_argument, 0, '2'},
+ {"with-zeros", no_argument, 0, '3'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ osnoise_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_hist_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_hist_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_hist_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ case '0': /* no header */
+ params->no_header = 1;
+ break;
+ case '1': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '2': /* no index */
+ params->no_index = 1;
+ break;
+ case '3': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ osnoise_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_index && !params->with_zeros)
+ osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * osnoise_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_hist - initialize a osnoise hist tool with parameters
+ */
+static struct osnoise_tool
+*osnoise_init_hist(struct osnoise_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+osnoise_hist_set_signals(struct osnoise_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_hist_main(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_destroy;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_destroy;
+ }
+
+ retval = osnoise_init_trace_hist(tool);
+ if (retval)
+ goto out_destroy;
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ osnoise_read_trace_hist(tool);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ osnoise_free_histogram(tool->data);
+out_destroy:
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
new file mode 100644
index 000000000000..332b2ac205fc
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise top parameters
+ */
+struct osnoise_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct osnoise_top_cpu {
+ unsigned long long sum_runtime;
+ unsigned long long sum_noise;
+ unsigned long long max_noise;
+ unsigned long long max_sample;
+
+ unsigned long long hw_count;
+ unsigned long long nmi_count;
+ unsigned long long irq_count;
+ unsigned long long softirq_count;
+ unsigned long long thread_count;
+
+ int sum_cycles;
+};
+
+struct osnoise_top_data {
+ struct osnoise_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_top - free runtime data
+ */
+static void
+osnoise_free_top(struct osnoise_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus)
+{
+ struct osnoise_top_data *data;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ return data;
+
+cleanup:
+ osnoise_free_top(data);
+ return NULL;
+}
+
+/*
+ * osnoise_top_handler - this is the handler for osnoise tracer events
+ */
+static int
+osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ struct osnoise_tool *tool;
+ unsigned long long val;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ cpu_data->sum_cycles++;
+
+ tep_get_field_val(s, event, "runtime", record, &val, 1);
+ update_sum(&cpu_data->sum_runtime, &val);
+
+ tep_get_field_val(s, event, "noise", record, &val, 1);
+ update_max(&cpu_data->max_noise, &val);
+ update_sum(&cpu_data->sum_noise, &val);
+
+ tep_get_field_val(s, event, "max_sample", record, &val, 1);
+ update_max(&cpu_data->max_sample, &val);
+
+ tep_get_field_val(s, event, "hw_count", record, &val, 1);
+ update_sum(&cpu_data->hw_count, &val);
+
+ tep_get_field_val(s, event, "nmi_count", record, &val, 1);
+ update_sum(&cpu_data->nmi_count, &val);
+
+ tep_get_field_val(s, event, "irq_count", record, &val, 1);
+ update_sum(&cpu_data->irq_count, &val);
+
+ tep_get_field_val(s, event, "softirq_count", record, &val, 1);
+ update_sum(&cpu_data->softirq_count, &val);
+
+ tep_get_field_val(s, event, "thread_count", record, &val, 1);
+ update_sum(&cpu_data->thread_count, &val);
+
+ return 0;
+}
+
+/*
+ * osnoise_top_header - print the header of the tool output
+ */
+static void osnoise_top_header(struct osnoise_tool *top)
+{
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Operating System Noise");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "duration: %9s | time is in us\n", duration);
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU Period Runtime ");
+ trace_seq_printf(s, " Noise ");
+ trace_seq_printf(s, " %% CPU Aval ");
+ trace_seq_printf(s, " Max Noise Max Single ");
+ trace_seq_printf(s, " HW NMI IRQ Softirq Thread");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * osnoise_top_print - prints the output of a given CPU
+ */
+static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
+{
+ struct trace_seq *s = tool->trace.seq;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int percentage;
+ int decimal;
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ if (!cpu_data->sum_runtime)
+ return;
+
+ percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000)
+ / cpu_data->sum_runtime;
+ decimal = percentage % 100000;
+ percentage = percentage / 100000;
+
+ trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime);
+ trace_seq_printf(s, "%12llu ", cpu_data->sum_noise);
+ trace_seq_printf(s, " %3d.%05d", percentage, decimal);
+ trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample);
+
+ trace_seq_printf(s, "%12llu ", cpu_data->hw_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->nmi_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->irq_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->softirq_count);
+ trace_seq_printf(s, "%12llu\n", cpu_data->thread_count);
+}
+
+/*
+ * osnoise_print_stats - print data for all cpus
+ */
+static void
+osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ osnoise_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ osnoise_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_top_usage - prints osnoise top usage message
+ */
+void osnoise_top_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::",
+ long_options, &option_index);
+
+ /* Detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_top_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_top_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_top_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ default:
+ osnoise_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("osnoise needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * osnoise_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_top - initialize a osnoise top tool with parameters
+ */
+struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_top");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_top(nr_cpus);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise",
+ osnoise_top_handler, NULL);
+
+ return tool;
+
+out_err:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_top_set_signals - handles the signal to stop the tool
+ */
+static void osnoise_top_set_signals(struct osnoise_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_top_main(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_top(params);
+ if (!tool) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_top_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_top_set_signals(params);
+
+ do {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ osnoise_print_stats(params, tool);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ } while (!stop_tracing);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("osnoise hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c
new file mode 100644
index 000000000000..09bd21b8af81
--- /dev/null
+++ b/tools/tracing/rtla/src/rtla.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "timerlat.h"
+
+/*
+ * rtla_usage - print rtla usage
+ */
+static void rtla_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "rtla version " VERSION,
+ "",
+ " usage: rtla COMMAND ...",
+ "",
+ " commands:",
+ " osnoise - gives information about the operating system noise (osnoise)",
+ " timerlat - measures the timer irq and thread latency",
+ "",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * run_command - try to run a rtla tool command
+ *
+ * It returns 0 if it fails. The tool's main will generally not
+ * return as they should call exit().
+ */
+int run_command(int argc, char **argv, int start_position)
+{
+ if (strcmp(argv[start_position], "osnoise") == 0) {
+ osnoise_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ } else if (strcmp(argv[start_position], "timerlat") == 0) {
+ timerlat_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ }
+
+ return 0;
+ran:
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int retval;
+
+ /* is it an alias? */
+ retval = run_command(argc, argv, 0);
+ if (retval)
+ exit(0);
+
+ if (argc < 2)
+ goto usage;
+
+ if (strcmp(argv[1], "-h") == 0) {
+ rtla_usage();
+ exit(0);
+ } else if (strcmp(argv[1], "--help") == 0) {
+ rtla_usage();
+ exit(0);
+ }
+
+ retval = run_command(argc, argv, 1);
+ if (retval)
+ exit(0);
+
+usage:
+ rtla_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c
new file mode 100644
index 000000000000..97abbf494fee
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "timerlat.h"
+
+static void timerlat_usage(void)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ "timerlat version " VERSION,
+ "",
+ " usage: [rtla] timerlat [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from timerlat tracer",
+ " hist - prints a histogram of timer latencies",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int timerlat_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if timerlat was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ timerlat_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ timerlat_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ timerlat_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ timerlat_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ timerlat_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ timerlat_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h
new file mode 100644
index 000000000000..88561bfd14f3
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.h
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+int timerlat_hist_main(int argc, char *argv[]);
+int timerlat_top_main(int argc, char *argv[]);
+int timerlat_main(int argc, char *argv[]);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
new file mode 100644
index 000000000000..235f9620ef3d
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int set_sched;
+ struct sched_attr sched_param;
+
+ char no_irq;
+ char no_thread;
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct timerlat_hist_cpu {
+ int *irq;
+ int *thread;
+
+ int irq_count;
+ int thread_count;
+
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_hist_data {
+ struct timerlat_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_histogram - free runtime data
+ */
+static void
+timerlat_free_histogram(struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].irq)
+ free(data->hist[cpu].irq);
+
+ if (data->hist[cpu].thread)
+ free(data->hist[cpu].thread);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_hist_data
+*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct timerlat_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ /* one histogram for IRQ and one for thread, per cpu */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
+ if (!data->hist[cpu].irq)
+ goto cleanup;
+ data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
+ if (!data->hist[cpu].thread)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].min_irq = ~0;
+ data->hist[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_histogram(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_hist_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ latency = latency / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = latency / data->bucket_size;
+
+ if (!thread) {
+ hist = data->hist[cpu].irq;
+ data->hist[cpu].irq_count++;
+ update_min(&data->hist[cpu].min_irq, &latency);
+ update_sum(&data->hist[cpu].sum_irq, &latency);
+ update_max(&data->hist[cpu].max_irq, &latency);
+ } else {
+ hist = data->hist[cpu].thread;
+ data->hist[cpu].thread_count++;
+ update_min(&data->hist[cpu].min_thread, &latency);
+ update_sum(&data->hist[cpu].sum_thread, &latency);
+ update_max(&data->hist[cpu].max_thread, &latency);
+ }
+
+ if (bucket < entries)
+ hist[bucket]++;
+ else
+ hist[entries]++;
+}
+
+/*
+ * timerlat_hist_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *data)
+{
+ struct trace_instance *trace = data;
+ unsigned long long thread, latency;
+ struct osnoise_tool *tool;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_hist_update(tool, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_hist_header - print the header of the tracer to the output
+ */
+static void timerlat_hist_header(struct osnoise_tool *tool)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA timerlat histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(s, " IRQ-%03d", cpu);
+
+ if (!params->no_thread)
+ trace_seq_printf(s, " Thr-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * timerlat_print_summary - print the summary of the hist data to the output
+ */
+static void
+timerlat_print_summary(struct timerlat_hist_params *params,
+ struct trace_instance *trace,
+ struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq_count);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread_count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ if (data->hist[cpu].irq_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_irq / data->hist[cpu].irq_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+
+ if (!params->no_thread) {
+ if (data->hist[cpu].thread_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_print_stats - print data for all CPUs
+ */
+static void
+timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
+{
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ timerlat_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ total += data->hist[cpu].irq[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[bucket]);
+ }
+
+ if (!params->no_thread) {
+ total += data->hist[cpu].thread[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[bucket]);
+ }
+
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[data->entries]);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ timerlat_print_summary(params, trace, data);
+}
+
+/*
+ * timerlat_hist_usage - prints timerlat top usage message
+ */
+static void timerlat_hist_usage(char *usage)
+{
+ int i;
+
+ char *msg[] = {
+ "",
+ " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-irq: ignore IRQ latencies",
+ " --no-thread: ignore thread latencies",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_hist_params
+*timerlat_hist_parse_args(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"bucket-size", required_argument, 0, 'b'},
+ {"debug", no_argument, 0, 'D'},
+ {"entries", required_argument, 0, 'e'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-irq", no_argument, 0, '0'},
+ {"no-thread", no_argument, 0, '1'},
+ {"no-header", no_argument, 0, '2'},
+ {"no-summary", no_argument, 0, '3'},
+ {"no-index", no_argument, 0, '4'},
+ {"with-zeros", no_argument, 0, '5'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_hist_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_hist_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ case '0': /* no irq */
+ params->no_irq = 1;
+ break;
+ case '1': /* no thread */
+ params->no_thread = 1;
+ break;
+ case '2': /* no header */
+ params->no_header = 1;
+ break;
+ case '3': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '4': /* no index */
+ params->no_index = 1;
+ break;
+ case '5': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ timerlat_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_irq && params->no_thread)
+ timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here");
+
+ if (params->no_index && !params->with_zeros)
+ timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * timerlat_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(tool->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_hist - initialize a timerlat hist tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_hist(struct timerlat_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("timerlat_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_hist_handler, tool);
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_hist_set_signals(struct timerlat_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_hist_main(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = timerlat_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_hist;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_hist;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ timerlat_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ timerlat_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ timerlat_free_histogram(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
new file mode 100644
index 000000000000..1ebd5291539c
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct timerlat_top_cpu {
+ int irq_count;
+ int thread_count;
+
+ unsigned long long cur_irq;
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long cur_thread;
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_top_data {
+ struct timerlat_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_top - free runtime data
+ */
+static void
+timerlat_free_top(struct timerlat_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus)
+{
+ struct timerlat_top_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->cpu_data[cpu].min_irq = ~0;
+ data->cpu_data[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_top(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_top_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_top_data *data = tool->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+
+ if (!thread) {
+ cpu_data->irq_count++;
+ cpu_data->cur_irq = latency;
+ update_min(&cpu_data->min_irq, &latency);
+ update_sum(&cpu_data->sum_irq, &latency);
+ update_max(&cpu_data->max_irq, &latency);
+ } else {
+ cpu_data->thread_count++;
+ cpu_data->cur_thread = latency;
+ update_min(&cpu_data->min_thread, &latency);
+ update_sum(&cpu_data->sum_thread, &latency);
+ update_max(&cpu_data->max_thread, &latency);
+ }
+}
+
+/*
+ * timerlat_top_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ unsigned long long latency, thread;
+ struct osnoise_tool *top;
+ int cpu = record->cpu;
+
+ top = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_top_update(top, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_top_header - print the header of the tool output
+ */
+static void timerlat_top_header(struct osnoise_tool *top)
+{
+ struct timerlat_top_params *params = top->params;
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Timer Latency ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration,
+ params->output_divisor == 1 ? "ns" : "us",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * timerlat_top_print - prints the output of a given CPU
+ */
+static void timerlat_top_print(struct osnoise_tool *top, int cpu)
+{
+
+ struct timerlat_top_params *params = top->params;
+ struct timerlat_top_data *data = top->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+ int divisor = params->output_divisor;
+ struct trace_seq *s = top->trace.seq;
+
+ if (divisor == 0)
+ return;
+
+ /*
+ * Skip if no data is available: is this cpu offline?
+ */
+ if (!cpu_data->irq_count && !cpu_data->thread_count)
+ return;
+
+ /*
+ * Unless trace is being lost, IRQ counter is always the max.
+ */
+ trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count);
+
+ if (!cpu_data->irq_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - |");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor);
+ trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor);
+ }
+
+ if (!cpu_data->thread_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " -\n");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (cpu_data->sum_thread / cpu_data->thread_count) / divisor);
+ trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor);
+ }
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * timerlat_print_stats - print data for all cpus
+ */
+static void
+timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ timerlat_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ timerlat_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_top_usage - prints timerlat top usage message
+ */
+static void timerlat_top_usage(char *usage)
+{
+ int i;
+
+ static const char *const msg[] = {
+ "",
+ " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_top_params
+*timerlat_top_parse_args(int argc, char **argv)
+{
+ struct timerlat_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_top_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_top_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ default:
+ timerlat_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * timerlat_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(top->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(top->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(top->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(top->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_top - initialize a timerlat top tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_top(struct timerlat_top_params *params)
+{
+ struct osnoise_tool *top;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ top = osnoise_init_tool("timerlat_top");
+ if (!top)
+ return NULL;
+
+ top->data = timerlat_alloc_top(nr_cpus);
+ if (!top->data)
+ goto out_err;
+
+ top->params = params;
+
+ tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_top_handler, top);
+
+ return top;
+
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_top_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_top_set_signals(struct timerlat_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_top_main(int argc, char *argv[])
+{
+ struct timerlat_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *top;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ top = timerlat_init_top(params);
+ if (!top) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_top_apply_config(top, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &top->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ top->start_time = time(NULL);
+ timerlat_top_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ timerlat_print_stats(params, top);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ };
+
+ timerlat_print_stats(params, top);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ timerlat_free_top(top->data);
+ osnoise_destroy_tool(top);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
new file mode 100644
index 000000000000..107a0c6387f7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/sendfile.h>
+#include <tracefs.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace.h"
+#include "utils.h"
+
+/*
+ * enable_tracer_by_name - enable a tracer on the given instance
+ */
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name)
+{
+ enum tracefs_tracers tracer;
+ int retval;
+
+ tracer = TRACEFS_TRACER_CUSTOM;
+
+ debug_msg("enabling %s tracer\n", tracer_name);
+
+ retval = tracefs_tracer_set(inst, tracer, tracer_name);
+ if (retval < 0) {
+ if (errno == ENODEV)
+ err_msg("tracer %s not found!\n", tracer_name);
+
+ err_msg("failed to enable the tracer %s\n", tracer_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * disable_tracer - set nop tracer to the insta
+ */
+void disable_tracer(struct tracefs_instance *inst)
+{
+ enum tracefs_tracers t = TRACEFS_TRACER_NOP;
+ int retval;
+
+ retval = tracefs_tracer_set(inst, t);
+ if (retval < 0)
+ err_msg("oops, error disabling tracer\n");
+}
+
+/*
+ * create_instance - create a trace instance with *instance_name
+ */
+struct tracefs_instance *create_instance(char *instance_name)
+{
+ return tracefs_instance_create(instance_name);
+}
+
+/*
+ * destroy_instance - remove a trace instance and free the data
+ */
+void destroy_instance(struct tracefs_instance *inst)
+{
+ tracefs_instance_destroy(inst);
+ tracefs_instance_free(inst);
+}
+
+/*
+ * save_trace_to_file - save the trace output of the instance to the file
+ */
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
+{
+ const char *file = "trace";
+ mode_t mode = 0644;
+ char buffer[4096];
+ int out_fd, in_fd;
+ int retval = -1;
+
+ in_fd = tracefs_instance_file_open(inst, file, O_RDONLY);
+ if (in_fd < 0) {
+ err_msg("Failed to open trace file\n");
+ return -1;
+ }
+
+ out_fd = creat(filename, mode);
+ if (out_fd < 0) {
+ err_msg("Failed to create output file %s\n", filename);
+ goto out_close_in;
+ }
+
+ do {
+ retval = read(in_fd, buffer, sizeof(buffer));
+ if (retval <= 0)
+ goto out_close;
+
+ retval = write(out_fd, buffer, retval);
+ if (retval < 0)
+ goto out_close;
+ } while (retval > 0);
+
+ retval = 0;
+out_close:
+ close(out_fd);
+out_close_in:
+ close(in_fd);
+ return retval;
+}
+
+/*
+ * collect_registered_events - call the existing callback function for the event
+ *
+ * If an event has a registered callback function, call it.
+ * Otherwise, ignore the event.
+ */
+int
+collect_registered_events(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct trace_instance *trace = context;
+ struct trace_seq *s = trace->seq;
+
+ if (!event->handler)
+ return 0;
+
+ event->handler(s, record, event, context);
+
+ return 0;
+}
+
+/*
+ * trace_instance_destroy - destroy and free a rtla trace instance
+ */
+void trace_instance_destroy(struct trace_instance *trace)
+{
+ if (trace->inst) {
+ disable_tracer(trace->inst);
+ destroy_instance(trace->inst);
+ }
+
+ if (trace->seq)
+ free(trace->seq);
+
+ if (trace->tep)
+ tep_free(trace->tep);
+}
+
+/*
+ * trace_instance_init - create an rtla trace instance
+ *
+ * It is more than the tracefs instance, as it contains other
+ * things required for the tracing, such as the local events and
+ * a seq file.
+ *
+ * Note that the trace instance is returned disabled. This allows
+ * the tool to apply some other configs, like setting priority
+ * to the kernel threads, before starting generating trace entries.
+ */
+int trace_instance_init(struct trace_instance *trace, char *tool_name)
+{
+ trace->seq = calloc(1, sizeof(*trace->seq));
+ if (!trace->seq)
+ goto out_err;
+
+ trace_seq_init(trace->seq);
+
+ trace->inst = create_instance(tool_name);
+ if (!trace->inst)
+ goto out_err;
+
+ trace->tep = tracefs_local_events(NULL);
+ if (!trace->tep)
+ goto out_err;
+
+ /*
+ * Let the main enable the record after setting some other
+ * things such as the priority of the tracer's threads.
+ */
+ tracefs_trace_off(trace->inst);
+
+ return 0;
+
+out_err:
+ trace_instance_destroy(trace);
+ return 1;
+}
+
+/*
+ * trace_instance_start - start tracing a given rtla instance
+ */
+int trace_instance_start(struct trace_instance *trace)
+{
+ return tracefs_trace_on(trace->inst);
+}
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
new file mode 100644
index 000000000000..0ea1df0ad9a7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <tracefs.h>
+#include <stddef.h>
+
+struct trace_instance {
+ struct tracefs_instance *inst;
+ struct tep_handle *tep;
+ struct trace_seq *seq;
+};
+
+int trace_instance_init(struct trace_instance *trace, char *tool_name);
+int trace_instance_start(struct trace_instance *trace);
+void trace_instance_destroy(struct trace_instance *trace);
+
+struct trace_seq *get_trace_seq(void);
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name);
+void disable_tracer(struct tracefs_instance *inst);
+
+int enable_osnoise(struct trace_instance *trace);
+int enable_timerlat(struct trace_instance *trace);
+
+struct tracefs_instance *create_instance(char *instance_name);
+void destroy_instance(struct tracefs_instance *inst);
+
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename);
+int collect_registered_events(struct tep_event *tep, struct tep_record *record,
+ int cpu, void *context);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
new file mode 100644
index 000000000000..1c9f0eea6166
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <proc/readproc.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+#define MAX_MSG_LENGTH 1024
+int config_debug;
+
+/*
+ * err_msg - print an error message to the stderr
+ */
+void err_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * debug_msg - print a debug message to stderr if debug is set
+ */
+void debug_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ if (!config_debug)
+ return;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * get_llong_from_str - get a long long int from a string
+ */
+long long get_llong_from_str(char *start)
+{
+ long long value;
+ char *end;
+
+ errno = 0;
+ value = strtoll(start, &end, 10);
+ if (errno || start == end)
+ return -1;
+
+ return value;
+}
+
+/*
+ * get_duration - fill output with a human readable duration since start_time
+ */
+void get_duration(time_t start_time, char *output, int output_size)
+{
+ time_t now = time(NULL);
+ struct tm *tm_info;
+ time_t duration;
+
+ duration = difftime(now, start_time);
+ tm_info = localtime(&duration);
+
+ snprintf(output, output_size, "%3d %02d:%02d:%02d",
+ tm_info->tm_yday,
+ tm_info->tm_hour - 1,
+ tm_info->tm_min,
+ tm_info->tm_sec);
+}
+
+/*
+ * parse_cpu_list - parse a cpu_list filling a char vector with cpus set
+ *
+ * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char
+ * in the monitored_cpus.
+ *
+ * XXX: convert to a bitmask.
+ */
+int parse_cpu_list(char *cpu_list, char **monitored_cpus)
+{
+ char *mon_cpus;
+ const char *p;
+ int end_cpu;
+ int nr_cpus;
+ int cpu;
+ int i;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ mon_cpus = malloc(nr_cpus * sizeof(char));
+ memset(mon_cpus, 0, (nr_cpus * sizeof(char)));
+
+ for (p = cpu_list; *p; ) {
+ cpu = atoi(p);
+ if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
+ goto err;
+
+ while (isdigit(*p))
+ p++;
+ if (*p == '-') {
+ p++;
+ end_cpu = atoi(p);
+ if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
+ goto err;
+ while (isdigit(*p))
+ p++;
+ } else
+ end_cpu = cpu;
+
+ if (cpu == end_cpu) {
+ debug_msg("cpu_list: adding cpu %d\n", cpu);
+ mon_cpus[cpu] = 1;
+ } else {
+ for (i = cpu; i <= end_cpu; i++) {
+ debug_msg("cpu_list: adding cpu %d\n", i);
+ mon_cpus[i] = 1;
+ }
+ }
+
+ if (*p == ',')
+ p++;
+ }
+
+ *monitored_cpus = mon_cpus;
+
+ return 0;
+
+err:
+ debug_msg("Error parsing the cpu list %s", cpu_list);
+ return 1;
+}
+
+/*
+ * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
+ */
+long parse_seconds_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ switch (*end) {
+ case 's':
+ case 'S':
+ break;
+ case 'm':
+ case 'M':
+ t *= 60;
+ break;
+ case 'h':
+ case 'H':
+ t *= 60 * 60;
+ break;
+
+ case 'd':
+ case 'D':
+ t *= 24 * 60 * 60;
+ break;
+ }
+ }
+
+ return t;
+}
+
+/*
+ * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
+ */
+long parse_ns_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ if (!strncmp(end, "ns", 2)) {
+ return t;
+ } else if (!strncmp(end, "us", 2)) {
+ t *= 1000;
+ return t;
+ } else if (!strncmp(end, "ms", 2)) {
+ t *= 1000 * 1000;
+ return t;
+ } else if (!strncmp(end, "s", 1)) {
+ t *= 1000 * 1000 * 1000;
+ return t;
+ }
+ return -1;
+ }
+
+ return t;
+}
+
+/*
+ * This is a set of helper functions to use SCHED_DEADLINE.
+ */
+#ifdef __x86_64__
+# define __NR_sched_setattr 314
+# define __NR_sched_getattr 315
+#elif __i386__
+# define __NR_sched_setattr 351
+# define __NR_sched_getattr 352
+#elif __arm__
+# define __NR_sched_setattr 380
+# define __NR_sched_getattr 381
+#elif __aarch64__
+# define __NR_sched_setattr 274
+# define __NR_sched_getattr 275
+#elif __powerpc__
+# define __NR_sched_setattr 355
+# define __NR_sched_getattr 356
+#elif __s390x__
+# define __NR_sched_setattr 345
+# define __NR_sched_getattr 346
+#endif
+
+#define SCHED_DEADLINE 6
+
+static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
+ unsigned int flags) {
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+}
+
+static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
+ unsigned int size, unsigned int flags)
+{
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+}
+
+int __set_sched_attr(int pid, struct sched_attr *attr)
+{
+ int flags = 0;
+ int retval;
+
+ retval = sched_setattr(pid, attr, flags);
+ if (retval < 0) {
+ err_msg("boost_with_deadline failed to boost pid %d: %s\n",
+ pid, strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+/*
+ * set_comm_sched_attr - set sched params to threads starting with char *comm
+ *
+ * This function uses procps to list the currently running threads and then
+ * set the sched_attr *attr to the threads that start with char *comm. It is
+ * mainly used to set the priority to the kernel threads created by the
+ * tracers.
+ */
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr)
+{
+ int flags = PROC_FILLCOM | PROC_FILLSTAT;
+ PROCTAB *ptp;
+ proc_t task;
+ int retval;
+
+ ptp = openproc(flags);
+ if (!ptp) {
+ err_msg("error openproc()\n");
+ return -ENOENT;
+ }
+
+ memset(&task, 0, sizeof(task));
+
+ while (readproc(ptp, &task)) {
+ retval = strncmp(comm, task.cmd, strlen(comm));
+ if (retval)
+ continue;
+ retval = __set_sched_attr(task.tid, attr);
+ if (retval)
+ goto out_err;
+ }
+
+ closeproc(ptp);
+ return 0;
+
+out_err:
+ closeproc(ptp);
+ return 1;
+}
+
+#define INVALID_VAL (~0L)
+static long get_long_ns_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = parse_ns_duration(start);
+
+ return val;
+}
+
+static long get_long_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = get_llong_from_str(start);
+
+ return val;
+}
+
+/*
+ * parse priority in the format:
+ * SCHED_OTHER:
+ * o:<prio>
+ * O:<prio>
+ * SCHED_RR:
+ * r:<prio>
+ * R:<prio>
+ * SCHED_FIFO:
+ * f:<prio>
+ * F:<prio>
+ * SCHED_DEADLINE:
+ * d:runtime:period
+ * D:runtime:period
+ */
+int parse_prio(char *arg, struct sched_attr *sched_param)
+{
+ long prio;
+ long runtime;
+ long period;
+
+ memset(sched_param, 0, sizeof(*sched_param));
+ sched_param->size = sizeof(*sched_param);
+
+ switch (arg[0]) {
+ case 'd':
+ case 'D':
+ /* d:runtime:period */
+ if (strlen(arg) < 4)
+ return -1;
+
+ runtime = get_long_ns_after_colon(arg);
+ if (runtime == INVALID_VAL)
+ return -1;
+
+ period = get_long_ns_after_colon(&arg[2]);
+ if (period == INVALID_VAL)
+ return -1;
+
+ if (runtime > period)
+ return -1;
+
+ sched_param->sched_policy = SCHED_DEADLINE;
+ sched_param->sched_runtime = runtime;
+ sched_param->sched_deadline = period;
+ sched_param->sched_period = period;
+ break;
+ case 'f':
+ case 'F':
+ /* f:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_FIFO))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_FIFO))
+ return -1;
+
+ sched_param->sched_policy = SCHED_FIFO;
+ sched_param->sched_priority = prio;
+ break;
+ case 'r':
+ case 'R':
+ /* r:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_RR))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_RR))
+ return -1;
+
+ sched_param->sched_policy = SCHED_RR;
+ sched_param->sched_priority = prio;
+ break;
+ case 'o':
+ case 'O':
+ /* o:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_OTHER))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_OTHER))
+ return -1;
+
+ sched_param->sched_policy = SCHED_OTHER;
+ sched_param->sched_priority = prio;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
new file mode 100644
index 000000000000..9aa962319ca2
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <time.h>
+
+/*
+ * '18446744073709551615\0'
+ */
+#define BUFF_U64_STR_SIZE 24
+
+#define container_of(ptr, type, member)({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)) ; })
+
+extern int config_debug;
+void debug_msg(const char *fmt, ...);
+void err_msg(const char *fmt, ...);
+
+long parse_seconds_duration(char *val);
+void get_duration(time_t start_time, char *output, int output_size);
+
+int parse_cpu_list(char *cpu_list, char **monitored_cpus);
+long long get_llong_from_str(char *start);
+
+static inline void
+update_min(unsigned long long *a, unsigned long long *b)
+{
+ if (*a > *b)
+ *a = *b;
+}
+
+static inline void
+update_max(unsigned long long *a, unsigned long long *b)
+{
+ if (*a < *b)
+ *a = *b;
+}
+
+static inline void
+update_sum(unsigned long long *a, unsigned long long *b)
+{
+ *a += *b;
+}
+
+struct sched_attr {
+ uint32_t size;
+ uint32_t sched_policy;
+ uint64_t sched_flags;
+ int32_t sched_nice;
+ uint32_t sched_priority;
+ uint64_t sched_runtime;
+ uint64_t sched_deadline;
+ uint64_t sched_period;
+};
+
+int parse_prio(char *arg, struct sched_attr *sched_param);
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr);
diff --git a/usr/Makefile b/usr/Makefile
index b1a81a40eab1..cc0d2824e100 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -3,11 +3,6 @@
# kbuild file for usr/ - including initramfs image
#
-# cmd_bzip2, cmd_lzma, cmd_lzo, cmd_lz4 from scripts/Makefile.lib appends the
-# size at the end of the compressed file, which unfortunately does not work
-# with unpack_to_rootfs(). Make size_append no-op.
-override size_append := :
-
compress-y := shipped
compress-$(CONFIG_INITRAMFS_COMPRESSION_GZIP) := gzip
compress-$(CONFIG_INITRAMFS_COMPRESSION_BZIP2) := bzip2
@@ -21,7 +16,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o
$(obj)/initramfs_data.o: $(obj)/initramfs_inc_data
-ramfs-input := $(strip $(shell echo $(CONFIG_INITRAMFS_SOURCE)))
+ramfs-input := $(CONFIG_INITRAMFS_SOURCE)
cpio-data :=
# If CONFIG_INITRAMFS_SOURCE is empty, generate a small initramfs with the
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 1c2ae1368079..7be7468c177b 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -34,7 +34,6 @@ no-header-test += linux/hdlc/ioctl.h
no-header-test += linux/ivtv.h
no-header-test += linux/kexec.h
no-header-test += linux/matroxfb.h
-no-header-test += linux/nfc.h
no-header-test += linux/omap3isp.h
no-header-test += linux/omapfb.h
no-header-test += linux/patchkey.h
@@ -99,10 +98,12 @@ quiet_cmd_hdrtest = HDRTEST $<
cmd_hdrtest = \
$(CC) $(c_flags) -S -o /dev/null -x c /dev/null \
$(if $(filter-out $(no-header-test), $*.h), -include $< -include $<); \
- $(PERL) $(srctree)/scripts/headers_check.pl $(obj) $(SRCARCH) $<; \
+ $(PERL) $(srctree)/$(src)/headers_check.pl $(obj) $(SRCARCH) $<; \
touch $@
$(obj)/%.hdrtest: $(obj)/%.h FORCE
$(call if_changed_dep,hdrtest)
-clean-files += $(filter-out Makefile, $(notdir $(wildcard $(obj)/*)))
+# Since GNU Make 4.3, $(patsubst $(obj)/%/,%,$(wildcard $(obj)/*/)) works.
+# To support older Make versions, use a somewhat tedious way.
+clean-files += $(filter-out Makefile headers_check.pl, $(notdir $(wildcard $(obj)/*)))
diff --git a/scripts/headers_check.pl b/usr/include/headers_check.pl
index b6aec5e4365f..b6aec5e4365f 100755
--- a/scripts/headers_check.pl
+++ b/usr/include/headers_check.pl
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 62b39149b8c8..f4834c20e4a6 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -4,6 +4,9 @@
config HAVE_KVM
bool
+config HAVE_KVM_PFNCACHE
+ bool
+
config HAVE_KVM_IRQCHIP
bool
@@ -13,6 +16,9 @@ config HAVE_KVM_IRQFD
config HAVE_KVM_IRQ_ROUTING
bool
+config HAVE_KVM_DIRTY_RING
+ bool
+
config HAVE_KVM_EVENTFD
bool
select EVENTFD
diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm
new file mode 100644
index 000000000000..2c27d5d0c367
--- /dev/null
+++ b/virt/kvm/Makefile.kvm
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+KVM ?= ../../../virt/kvm
+
+kvm-y := $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
+kvm-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
+kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
+kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
+kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index dd777688d14a..9bfe1d6f6529 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -85,7 +85,7 @@ static void async_pf_execute(struct work_struct *work)
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
- rcuwait_wake_up(&vcpu->wait);
+ __kvm_vcpu_wake_up(vcpu);
mmput(mm);
kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 88f4683198ea..222ecc81d7df 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -9,7 +9,7 @@
#include <linux/vmalloc.h>
#include <linux/kvm_dirty_ring.h>
#include <trace/events/kvm.h>
-#include "mmu_lock.h"
+#include "kvm_mm.h"
int __weak kvm_cpu_dirty_log_size(void)
{
@@ -36,15 +36,6 @@ static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring)
return kvm_dirty_ring_used(ring) >= ring->size;
}
-struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
-
- WARN_ON_ONCE(vcpu->kvm != kvm);
-
- return &vcpu->dirty_ring;
-}
-
static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
{
struct kvm_memory_slot *memslot;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 938975ff75a0..9a20f2299386 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,7 +59,7 @@
#include "coalesced_mmio.h"
#include "async_pf.h"
-#include "mmu_lock.h"
+#include "kvm_mm.h"
#include "vfio.h"
#define CREATE_TRACE_POINTS
@@ -305,8 +305,9 @@ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
{
struct kvm_vcpu *vcpu;
struct cpumask *cpus;
+ unsigned long i;
bool called;
- int i, me;
+ int me;
me = get_cpu();
@@ -421,21 +422,20 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
+#ifndef __KVM_HAVE_ARCH_WQP
rcuwait_init(&vcpu->wait);
+#endif
kvm_async_pf_vcpu_init(vcpu);
- vcpu->pre_pcpu = -1;
- INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
-
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
vcpu->ready = false;
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
- vcpu->last_used_slot = 0;
+ vcpu->last_used_slot = NULL;
}
-void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kvm_dirty_ring_free(&vcpu->dirty_ring);
kvm_arch_vcpu_destroy(vcpu);
@@ -450,7 +450,20 @@ void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->run);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_destroy);
+
+void kvm_destroy_vcpus(struct kvm *kvm)
+{
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_vcpu_destroy(vcpu);
+ xa_erase(&kvm->vcpu_array, i);
+ }
+
+ atomic_set(&kvm->online_vcpus, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_destroy_vcpus);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
@@ -498,6 +511,12 @@ static void kvm_null_fn(void)
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
+/* Iterate over each memslot intersecting [start, last] (inclusive) range */
+#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
+ for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
+ node; \
+ node = interval_tree_iter_next(node, start, last)) \
+
static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
const struct kvm_hva_range *range)
{
@@ -507,6 +526,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
struct kvm_memslots *slots;
int i, idx;
+ if (WARN_ON_ONCE(range->end <= range->start))
+ return 0;
+
/* A null handler is allowed if and only if on_lock() is provided. */
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
IS_KVM_NULL_FN(range->handler)))
@@ -515,15 +537,17 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ struct interval_tree_node *node;
+
slots = __kvm_memslots(kvm, i);
- kvm_for_each_memslot(slot, slots) {
+ kvm_for_each_memslot_in_hva_range(node, slots,
+ range->start, range->end - 1) {
unsigned long hva_start, hva_end;
+ slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]);
hva_start = max(range->start, slot->userspace_addr);
hva_end = min(range->end, slot->userspace_addr +
(slot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
/*
* To optimize for the likely case where the address
@@ -684,6 +708,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm->mn_active_invalidate_count++;
spin_unlock(&kvm->mn_invalidate_lock);
+ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
+ hva_range.may_block);
+
__kvm_handle_hva_range(kvm, &hva_range);
return 0;
@@ -851,21 +878,6 @@ static void kvm_destroy_pm_notifier(struct kvm *kvm)
}
#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
-static struct kvm_memslots *kvm_alloc_memslots(void)
-{
- int i;
- struct kvm_memslots *slots;
-
- slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
- if (!slots)
- return NULL;
-
- for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
- slots->id_to_index[i] = -1;
-
- return slots;
-}
-
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
{
if (!memslot->dirty_bitmap)
@@ -875,27 +887,33 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
memslot->dirty_bitmap = NULL;
}
+/* This does not remove the slot from struct kvm_memslots data structures */
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
kvm_destroy_dirty_bitmap(slot);
kvm_arch_free_memslot(kvm, slot);
- slot->flags = 0;
- slot->npages = 0;
+ kfree(slot);
}
static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
{
+ struct hlist_node *idnode;
struct kvm_memory_slot *memslot;
+ int bkt;
- if (!slots)
+ /*
+ * The same memslot objects live in both active and inactive sets,
+ * arbitrarily free using index '1' so the second invocation of this
+ * function isn't operating over a structure with dangling pointers
+ * (even though this function isn't actually touching them).
+ */
+ if (!slots->node_idx)
return;
- kvm_for_each_memslot(memslot, slots)
+ hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1])
kvm_free_memslot(kvm, memslot);
-
- kvfree(slots);
}
static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc)
@@ -1034,8 +1052,9 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
static struct kvm *kvm_create_vm(unsigned long type)
{
struct kvm *kvm = kvm_arch_alloc_vm();
+ struct kvm_memslots *slots;
int r = -ENOMEM;
- int i;
+ int i, j;
if (!kvm)
return ERR_PTR(-ENOMEM);
@@ -1050,6 +1069,10 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->slots_arch_lock);
spin_lock_init(&kvm->mn_invalidate_lock);
rcuwait_init(&kvm->mn_memslots_update_rcuwait);
+ xa_init(&kvm->vcpu_array);
+
+ INIT_LIST_HEAD(&kvm->gpc_list);
+ spin_lock_init(&kvm->gpc_lock);
INIT_LIST_HEAD(&kvm->devices);
@@ -1062,13 +1085,20 @@ static struct kvm *kvm_create_vm(unsigned long type)
refcount_set(&kvm->users_count, 1);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_memslots *slots = kvm_alloc_memslots();
+ for (j = 0; j < 2; j++) {
+ slots = &kvm->__memslots[i][j];
- if (!slots)
- goto out_err_no_arch_destroy_vm;
- /* Generations must be different for each address space. */
- slots->generation = i;
- rcu_assign_pointer(kvm->memslots[i], slots);
+ atomic_long_set(&slots->last_used_slot, (unsigned long)NULL);
+ slots->hva_tree = RB_ROOT_CACHED;
+ slots->gfn_tree = RB_ROOT;
+ hash_init(slots->id_hash);
+ slots->node_idx = j;
+
+ /* Generations must be different for each address space. */
+ slots->generation = i;
+ }
+
+ rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]);
}
for (i = 0; i < KVM_NR_BUSES; i++) {
@@ -1122,8 +1152,6 @@ out_err_no_arch_destroy_vm:
WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
cleanup_srcu_struct(&kvm->irq_srcu);
out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
@@ -1188,8 +1216,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
+ kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
+ }
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
@@ -1259,165 +1289,136 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
return 0;
}
-/*
- * Delete a memslot by decrementing the number of used slots and shifting all
- * other entries in the array forward one spot.
- */
-static inline void kvm_memslot_delete(struct kvm_memslots *slots,
- struct kvm_memory_slot *memslot)
+static struct kvm_memslots *kvm_get_inactive_memslots(struct kvm *kvm, int as_id)
{
- struct kvm_memory_slot *mslots = slots->memslots;
- int i;
-
- if (WARN_ON(slots->id_to_index[memslot->id] == -1))
- return;
-
- slots->used_slots--;
-
- if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
- atomic_set(&slots->last_used_slot, 0);
+ struct kvm_memslots *active = __kvm_memslots(kvm, as_id);
+ int node_idx_inactive = active->node_idx ^ 1;
- for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
- mslots[i] = mslots[i + 1];
- slots->id_to_index[mslots[i].id] = i;
- }
- mslots[i] = *memslot;
- slots->id_to_index[memslot->id] = -1;
+ return &kvm->__memslots[as_id][node_idx_inactive];
}
/*
- * "Insert" a new memslot by incrementing the number of used slots. Returns
- * the new slot's initial index into the memslots array.
+ * Helper to get the address space ID when one of memslot pointers may be NULL.
+ * This also serves as a sanity that at least one of the pointers is non-NULL,
+ * and that their address space IDs don't diverge.
*/
-static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
+static int kvm_memslots_get_as_id(struct kvm_memory_slot *a,
+ struct kvm_memory_slot *b)
{
- return slots->used_slots++;
-}
+ if (WARN_ON_ONCE(!a && !b))
+ return 0;
-/*
- * Move a changed memslot backwards in the array by shifting existing slots
- * with a higher GFN toward the front of the array. Note, the changed memslot
- * itself is not preserved in the array, i.e. not swapped at this time, only
- * its new index into the array is tracked. Returns the changed memslot's
- * current index into the memslots array.
- */
-static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
- struct kvm_memory_slot *memslot)
-{
- struct kvm_memory_slot *mslots = slots->memslots;
- int i;
+ if (!a)
+ return b->as_id;
+ if (!b)
+ return a->as_id;
- if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
- WARN_ON_ONCE(!slots->used_slots))
- return -1;
+ WARN_ON_ONCE(a->as_id != b->as_id);
+ return a->as_id;
+}
- /*
- * Move the target memslot backward in the array by shifting existing
- * memslots with a higher GFN (than the target memslot) towards the
- * front of the array.
- */
- for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
- if (memslot->base_gfn > mslots[i + 1].base_gfn)
- break;
+static void kvm_insert_gfn_node(struct kvm_memslots *slots,
+ struct kvm_memory_slot *slot)
+{
+ struct rb_root *gfn_tree = &slots->gfn_tree;
+ struct rb_node **node, *parent;
+ int idx = slots->node_idx;
- WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
+ parent = NULL;
+ for (node = &gfn_tree->rb_node; *node; ) {
+ struct kvm_memory_slot *tmp;
- /* Shift the next memslot forward one and update its index. */
- mslots[i] = mslots[i + 1];
- slots->id_to_index[mslots[i].id] = i;
+ tmp = container_of(*node, struct kvm_memory_slot, gfn_node[idx]);
+ parent = *node;
+ if (slot->base_gfn < tmp->base_gfn)
+ node = &(*node)->rb_left;
+ else if (slot->base_gfn > tmp->base_gfn)
+ node = &(*node)->rb_right;
+ else
+ BUG();
}
- return i;
+
+ rb_link_node(&slot->gfn_node[idx], parent, node);
+ rb_insert_color(&slot->gfn_node[idx], gfn_tree);
}
-/*
- * Move a changed memslot forwards in the array by shifting existing slots with
- * a lower GFN toward the back of the array. Note, the changed memslot itself
- * is not preserved in the array, i.e. not swapped at this time, only its new
- * index into the array is tracked. Returns the changed memslot's final index
- * into the memslots array.
- */
-static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
- struct kvm_memory_slot *memslot,
- int start)
+static void kvm_erase_gfn_node(struct kvm_memslots *slots,
+ struct kvm_memory_slot *slot)
{
- struct kvm_memory_slot *mslots = slots->memslots;
- int i;
+ rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree);
+}
- for (i = start; i > 0; i--) {
- if (memslot->base_gfn < mslots[i - 1].base_gfn)
- break;
+static void kvm_replace_gfn_node(struct kvm_memslots *slots,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
+{
+ int idx = slots->node_idx;
- WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
+ WARN_ON_ONCE(old->base_gfn != new->base_gfn);
- /* Shift the next memslot back one and update its index. */
- mslots[i] = mslots[i - 1];
- slots->id_to_index[mslots[i].id] = i;
- }
- return i;
+ rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx],
+ &slots->gfn_tree);
}
/*
- * Re-sort memslots based on their GFN to account for an added, deleted, or
- * moved memslot. Sorting memslots by GFN allows using a binary search during
- * memslot lookup.
- *
- * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry
- * at memslots[0] has the highest GFN.
- *
- * The sorting algorithm takes advantage of having initially sorted memslots
- * and knowing the position of the changed memslot. Sorting is also optimized
- * by not swapping the updated memslot and instead only shifting other memslots
- * and tracking the new index for the update memslot. Only once its final
- * index is known is the updated memslot copied into its position in the array.
- *
- * - When deleting a memslot, the deleted memslot simply needs to be moved to
- * the end of the array.
- *
- * - When creating a memslot, the algorithm "inserts" the new memslot at the
- * end of the array and then it forward to its correct location.
+ * Replace @old with @new in the inactive memslots.
*
- * - When moving a memslot, the algorithm first moves the updated memslot
- * backward to handle the scenario where the memslot's GFN was changed to a
- * lower value. update_memslots() then falls through and runs the same flow
- * as creating a memslot to move the memslot forward to handle the scenario
- * where its GFN was changed to a higher value.
+ * With NULL @old this simply adds @new.
+ * With NULL @new this simply removes @old.
*
- * Note, slots are sorted from highest->lowest instead of lowest->highest for
- * historical reasons. Originally, invalid memslots where denoted by having
- * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots
- * to the end of the array. The current algorithm uses dedicated logic to
- * delete a memslot and thus does not rely on invalid memslots having GFN=0.
- *
- * The other historical motiviation for highest->lowest was to improve the
- * performance of memslot lookup. KVM originally used a linear search starting
- * at memslots[0]. On x86, the largest memslot usually has one of the highest,
- * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a
- * single memslot above the 4gb boundary. As the largest memslot is also the
- * most likely to be referenced, sorting it to the front of the array was
- * advantageous. The current binary search starts from the middle of the array
- * and uses an LRU pointer to improve performance for all memslots and GFNs.
+ * If @new is non-NULL its hva_node[slots_idx] range has to be set
+ * appropriately.
*/
-static void update_memslots(struct kvm_memslots *slots,
- struct kvm_memory_slot *memslot,
- enum kvm_mr_change change)
+static void kvm_replace_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
{
- int i;
+ int as_id = kvm_memslots_get_as_id(old, new);
+ struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id);
+ int idx = slots->node_idx;
- if (change == KVM_MR_DELETE) {
- kvm_memslot_delete(slots, memslot);
- } else {
- if (change == KVM_MR_CREATE)
- i = kvm_memslot_insert_back(slots);
- else
- i = kvm_memslot_move_backward(slots, memslot);
- i = kvm_memslot_move_forward(slots, memslot, i);
+ if (old) {
+ hash_del(&old->id_node[idx]);
+ interval_tree_remove(&old->hva_node[idx], &slots->hva_tree);
- /*
- * Copy the memslot to its new position in memslots and update
- * its index accordingly.
- */
- slots->memslots[i] = *memslot;
- slots->id_to_index[memslot->id] = i;
+ if ((long)old == atomic_long_read(&slots->last_used_slot))
+ atomic_long_set(&slots->last_used_slot, (long)new);
+
+ if (!new) {
+ kvm_erase_gfn_node(slots, old);
+ return;
+ }
+ }
+
+ /*
+ * Initialize @new's hva range. Do this even when replacing an @old
+ * slot, kvm_copy_memslot() deliberately does not touch node data.
+ */
+ new->hva_node[idx].start = new->userspace_addr;
+ new->hva_node[idx].last = new->userspace_addr +
+ (new->npages << PAGE_SHIFT) - 1;
+
+ /*
+ * (Re)Add the new memslot. There is no O(1) interval_tree_replace(),
+ * hva_node needs to be swapped with remove+insert even though hva can't
+ * change when replacing an existing slot.
+ */
+ hash_add(slots->id_hash, &new->id_node[idx], new->id);
+ interval_tree_insert(&new->hva_node[idx], &slots->hva_tree);
+
+ /*
+ * If the memslot gfn is unchanged, rb_replace_node() can be used to
+ * switch the node in the gfn tree instead of removing the old and
+ * inserting the new as two separate operations. Replacement is a
+ * single O(1) operation versus two O(log(n)) operations for
+ * remove+insert.
+ */
+ if (old && old->base_gfn == new->base_gfn) {
+ kvm_replace_gfn_node(slots, old, new);
+ } else {
+ if (old)
+ kvm_erase_gfn_node(slots, old);
+ kvm_insert_gfn_node(slots, new);
}
}
@@ -1435,11 +1436,12 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
return 0;
}
-static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
- int as_id, struct kvm_memslots *slots)
+static void kvm_swap_active_memslots(struct kvm *kvm, int as_id)
{
- struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
- u64 gen = old_memslots->generation;
+ struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id);
+
+ /* Grab the generation from the activate memslots. */
+ u64 gen = __kvm_memslots(kvm, as_id)->generation;
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
@@ -1490,60 +1492,226 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
kvm_arch_memslots_updated(kvm, gen);
slots->generation = gen;
-
- return old_memslots;
}
-static size_t kvm_memslots_size(int slots)
+static int kvm_prepare_memory_region(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- return sizeof(struct kvm_memslots) +
- (sizeof(struct kvm_memory_slot) * slots);
+ int r;
+
+ /*
+ * If dirty logging is disabled, nullify the bitmap; the old bitmap
+ * will be freed on "commit". If logging is enabled in both old and
+ * new, reuse the existing bitmap. If logging is enabled only in the
+ * new and KVM isn't using a ring buffer, allocate and initialize a
+ * new bitmap.
+ */
+ if (change != KVM_MR_DELETE) {
+ if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ new->dirty_bitmap = NULL;
+ else if (old && old->dirty_bitmap)
+ new->dirty_bitmap = old->dirty_bitmap;
+ else if (!kvm->dirty_ring_size) {
+ r = kvm_alloc_dirty_bitmap(new);
+ if (r)
+ return r;
+
+ if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+ bitmap_set(new->dirty_bitmap, 0, new->npages);
+ }
+ }
+
+ r = kvm_arch_prepare_memory_region(kvm, old, new, change);
+
+ /* Free the bitmap on failure if it was allocated above. */
+ if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap)
+ kvm_destroy_dirty_bitmap(new);
+
+ return r;
}
-static void kvm_copy_memslots(struct kvm_memslots *to,
- struct kvm_memslots *from)
+static void kvm_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- memcpy(to, from, kvm_memslots_size(from->used_slots));
+ /*
+ * Update the total number of memslot pages before calling the arch
+ * hook so that architectures can consume the result directly.
+ */
+ if (change == KVM_MR_DELETE)
+ kvm->nr_memslot_pages -= old->npages;
+ else if (change == KVM_MR_CREATE)
+ kvm->nr_memslot_pages += new->npages;
+
+ kvm_arch_commit_memory_region(kvm, old, new, change);
+
+ switch (change) {
+ case KVM_MR_CREATE:
+ /* Nothing more to do. */
+ break;
+ case KVM_MR_DELETE:
+ /* Free the old memslot and all its metadata. */
+ kvm_free_memslot(kvm, old);
+ break;
+ case KVM_MR_MOVE:
+ case KVM_MR_FLAGS_ONLY:
+ /*
+ * Free the dirty bitmap as needed; the below check encompasses
+ * both the flags and whether a ring buffer is being used)
+ */
+ if (old->dirty_bitmap && !new->dirty_bitmap)
+ kvm_destroy_dirty_bitmap(old);
+
+ /*
+ * The final quirk. Free the detached, old slot, but only its
+ * memory, not any metadata. Metadata, including arch specific
+ * data, may be reused by @new.
+ */
+ kfree(old);
+ break;
+ default:
+ BUG();
+ }
}
/*
- * Note, at a minimum, the current number of used slots must be allocated, even
- * when deleting a memslot, as we need a complete duplicate of the memslots for
- * use when invalidating a memslot prior to deleting/moving the memslot.
+ * Activate @new, which must be installed in the inactive slots by the caller,
+ * by swapping the active slots and then propagating @new to @old once @old is
+ * unreachable and can be safely modified.
+ *
+ * With NULL @old this simply adds @new to @active (while swapping the sets).
+ * With NULL @new this simply removes @old from @active and frees it
+ * (while also swapping the sets).
*/
-static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
- enum kvm_mr_change change)
+static void kvm_activate_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
{
- struct kvm_memslots *slots;
- size_t new_size;
+ int as_id = kvm_memslots_get_as_id(old, new);
- if (change == KVM_MR_CREATE)
- new_size = kvm_memslots_size(old->used_slots + 1);
- else
- new_size = kvm_memslots_size(old->used_slots);
+ kvm_swap_active_memslots(kvm, as_id);
- slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
- if (likely(slots))
- kvm_copy_memslots(slots, old);
+ /* Propagate the new memslot to the now inactive memslots. */
+ kvm_replace_memslot(kvm, old, new);
+}
- return slots;
+static void kvm_copy_memslot(struct kvm_memory_slot *dest,
+ const struct kvm_memory_slot *src)
+{
+ dest->base_gfn = src->base_gfn;
+ dest->npages = src->npages;
+ dest->dirty_bitmap = src->dirty_bitmap;
+ dest->arch = src->arch;
+ dest->userspace_addr = src->userspace_addr;
+ dest->flags = src->flags;
+ dest->id = src->id;
+ dest->as_id = src->as_id;
+}
+
+static void kvm_invalidate_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *invalid_slot)
+{
+ /*
+ * Mark the current slot INVALID. As with all memslot modifications,
+ * this must be done on an unreachable slot to avoid modifying the
+ * current slot in the active tree.
+ */
+ kvm_copy_memslot(invalid_slot, old);
+ invalid_slot->flags |= KVM_MEMSLOT_INVALID;
+ kvm_replace_memslot(kvm, old, invalid_slot);
+
+ /*
+ * Activate the slot that is now marked INVALID, but don't propagate
+ * the slot to the now inactive slots. The slot is either going to be
+ * deleted or recreated as a new slot.
+ */
+ kvm_swap_active_memslots(kvm, old->as_id);
+
+ /*
+ * From this point no new shadow pages pointing to a deleted, or moved,
+ * memslot will be created. Validation of sp->gfn happens in:
+ * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+ * - kvm_is_visible_gfn (mmu_check_root)
+ */
+ kvm_arch_flush_shadow_memslot(kvm, old);
+
+ /* Was released by kvm_swap_active_memslots, reacquire. */
+ mutex_lock(&kvm->slots_arch_lock);
+
+ /*
+ * Copy the arch-specific field of the newly-installed slot back to the
+ * old slot as the arch data could have changed between releasing
+ * slots_arch_lock in install_new_memslots() and re-acquiring the lock
+ * above. Writers are required to retrieve memslots *after* acquiring
+ * slots_arch_lock, thus the active slot's data is guaranteed to be fresh.
+ */
+ old->arch = invalid_slot->arch;
+}
+
+static void kvm_create_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *new)
+{
+ /* Add the new memslot to the inactive set and activate. */
+ kvm_replace_memslot(kvm, NULL, new);
+ kvm_activate_memslot(kvm, NULL, new);
+}
+
+static void kvm_delete_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *invalid_slot)
+{
+ /*
+ * Remove the old memslot (in the inactive memslots) by passing NULL as
+ * the "new" slot, and for the invalid version in the active slots.
+ */
+ kvm_replace_memslot(kvm, old, NULL);
+ kvm_activate_memslot(kvm, invalid_slot, NULL);
+}
+
+static void kvm_move_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ struct kvm_memory_slot *invalid_slot)
+{
+ /*
+ * Replace the old memslot in the inactive slots, and then swap slots
+ * and replace the current INVALID with the new as well.
+ */
+ kvm_replace_memslot(kvm, old, new);
+ kvm_activate_memslot(kvm, invalid_slot, new);
+}
+
+static void kvm_update_flags_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
+{
+ /*
+ * Similar to the MOVE case, but the slot doesn't need to be zapped as
+ * an intermediate step. Instead, the old memslot is simply replaced
+ * with a new, updated copy in both memslot sets.
+ */
+ kvm_replace_memslot(kvm, old, new);
+ kvm_activate_memslot(kvm, old, new);
}
static int kvm_set_memslot(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot *new, int as_id,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- struct kvm_memory_slot *slot, old;
- struct kvm_memslots *slots;
+ struct kvm_memory_slot *invalid_slot;
int r;
/*
- * Released in install_new_memslots.
+ * Released in kvm_swap_active_memslots.
*
* Must be held from before the current memslots are copied until
* after the new memslots are installed with rcu_assign_pointer,
- * then released before the synchronize srcu in install_new_memslots.
+ * then released before the synchronize srcu in kvm_swap_active_memslots.
*
* When modifying memslots outside of the slots_lock, must be held
* before reading the pointer to the current memslots until after all
@@ -1554,114 +1722,88 @@ static int kvm_set_memslot(struct kvm *kvm,
*/
mutex_lock(&kvm->slots_arch_lock);
- slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
- if (!slots) {
- mutex_unlock(&kvm->slots_arch_lock);
- return -ENOMEM;
- }
-
+ /*
+ * Invalidate the old slot if it's being deleted or moved. This is
+ * done prior to actually deleting/moving the memslot to allow vCPUs to
+ * continue running by ensuring there are no mappings or shadow pages
+ * for the memslot when it is deleted/moved. Without pre-invalidation
+ * (and without a lock), a window would exist between effecting the
+ * delete/move and committing the changes in arch code where KVM or a
+ * guest could access a non-existent memslot.
+ *
+ * Modifications are done on a temporary, unreachable slot. The old
+ * slot needs to be preserved in case a later step fails and the
+ * invalidation needs to be reverted.
+ */
if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
- /*
- * Note, the INVALID flag needs to be in the appropriate entry
- * in the freshly allocated memslots, not in @old or @new.
- */
- slot = id_to_memslot(slots, new->id);
- slot->flags |= KVM_MEMSLOT_INVALID;
-
- /*
- * We can re-use the memory from the old memslots.
- * It will be overwritten with a copy of the new memslots
- * after reacquiring the slots_arch_lock below.
- */
- slots = install_new_memslots(kvm, as_id, slots);
-
- /* From this point no new shadow pages pointing to a deleted,
- * or moved, memslot will be created.
- *
- * validation of sp->gfn happens in:
- * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
- * - kvm_is_visible_gfn (mmu_check_root)
- */
- kvm_arch_flush_shadow_memslot(kvm, slot);
-
- /* Released in install_new_memslots. */
- mutex_lock(&kvm->slots_arch_lock);
+ invalid_slot = kzalloc(sizeof(*invalid_slot), GFP_KERNEL_ACCOUNT);
+ if (!invalid_slot) {
+ mutex_unlock(&kvm->slots_arch_lock);
+ return -ENOMEM;
+ }
+ kvm_invalidate_memslot(kvm, old, invalid_slot);
+ }
+ r = kvm_prepare_memory_region(kvm, old, new, change);
+ if (r) {
/*
- * The arch-specific fields of the memslots could have changed
- * between releasing the slots_arch_lock in
- * install_new_memslots and here, so get a fresh copy of the
- * slots.
+ * For DELETE/MOVE, revert the above INVALID change. No
+ * modifications required since the original slot was preserved
+ * in the inactive slots. Changing the active memslots also
+ * release slots_arch_lock.
*/
- kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+ kvm_activate_memslot(kvm, invalid_slot, old);
+ kfree(invalid_slot);
+ } else {
+ mutex_unlock(&kvm->slots_arch_lock);
+ }
+ return r;
}
/*
- * Make a full copy of the old memslot, the pointer will become stale
- * when the memslots are re-sorted by update_memslots(), and the old
- * memslot needs to be referenced after calling update_memslots(), e.g.
- * to free its resources and for arch specific behavior. This needs to
- * happen *after* (re)acquiring slots_arch_lock.
+ * For DELETE and MOVE, the working slot is now active as the INVALID
+ * version of the old slot. MOVE is particularly special as it reuses
+ * the old slot and returns a copy of the old slot (in working_slot).
+ * For CREATE, there is no old slot. For DELETE and FLAGS_ONLY, the
+ * old slot is detached but otherwise preserved.
*/
- slot = id_to_memslot(slots, new->id);
- if (slot) {
- old = *slot;
- } else {
- WARN_ON_ONCE(change != KVM_MR_CREATE);
- memset(&old, 0, sizeof(old));
- old.id = new->id;
- old.as_id = as_id;
- }
-
- /* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
- memcpy(&new->arch, &old.arch, sizeof(old.arch));
-
- r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
- if (r)
- goto out_slots;
-
- update_memslots(slots, new, change);
- slots = install_new_memslots(kvm, as_id, slots);
+ if (change == KVM_MR_CREATE)
+ kvm_create_memslot(kvm, new);
+ else if (change == KVM_MR_DELETE)
+ kvm_delete_memslot(kvm, old, invalid_slot);
+ else if (change == KVM_MR_MOVE)
+ kvm_move_memslot(kvm, old, new, invalid_slot);
+ else if (change == KVM_MR_FLAGS_ONLY)
+ kvm_update_flags_memslot(kvm, old, new);
+ else
+ BUG();
- kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
+ /* Free the temporary INVALID slot used for DELETE and MOVE. */
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+ kfree(invalid_slot);
- /* Free the old memslot's metadata. Note, this is the full copy!!! */
- if (change == KVM_MR_DELETE)
- kvm_free_memslot(kvm, &old);
+ /*
+ * No need to refresh new->arch, changes after dropping slots_arch_lock
+ * will directly hit the final, active memsot. Architectures are
+ * responsible for knowing that new->arch may be stale.
+ */
+ kvm_commit_memory_region(kvm, old, new, change);
- kvfree(slots);
return 0;
-
-out_slots:
- if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
- slot = id_to_memslot(slots, new->id);
- slot->flags &= ~KVM_MEMSLOT_INVALID;
- slots = install_new_memslots(kvm, as_id, slots);
- } else {
- mutex_unlock(&kvm->slots_arch_lock);
- }
- kvfree(slots);
- return r;
}
-static int kvm_delete_memslot(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot *old, int as_id)
+static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
+ gfn_t start, gfn_t end)
{
- struct kvm_memory_slot new;
+ struct kvm_memslot_iter iter;
- if (!old->npages)
- return -EINVAL;
-
- memset(&new, 0, sizeof(new));
- new.id = old->id;
- /*
- * This is only for debugging purpose; it should never be referenced
- * for a removed memslot.
- */
- new.as_id = as_id;
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) {
+ if (iter.slot->id != id)
+ return true;
+ }
- return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
+ return false;
}
/*
@@ -1675,9 +1817,11 @@ static int kvm_delete_memslot(struct kvm *kvm,
int __kvm_set_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem)
{
- struct kvm_memory_slot old, new;
- struct kvm_memory_slot *tmp;
+ struct kvm_memory_slot *old, *new;
+ struct kvm_memslots *slots;
enum kvm_mr_change change;
+ unsigned long npages;
+ gfn_t base_gfn;
int as_id, id;
int r;
@@ -1704,89 +1848,72 @@ int __kvm_set_memory_region(struct kvm *kvm,
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
return -EINVAL;
+ if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
+ return -EINVAL;
+
+ slots = __kvm_memslots(kvm, as_id);
/*
- * Make a full copy of the old memslot, the pointer will become stale
- * when the memslots are re-sorted by update_memslots(), and the old
- * memslot needs to be referenced after calling update_memslots(), e.g.
- * to free its resources and for arch specific behavior.
+ * Note, the old memslot (and the pointer itself!) may be invalidated
+ * and/or destroyed by kvm_set_memslot().
*/
- tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id);
- if (tmp) {
- old = *tmp;
- tmp = NULL;
- } else {
- memset(&old, 0, sizeof(old));
- old.id = id;
- }
+ old = id_to_memslot(slots, id);
- if (!mem->memory_size)
- return kvm_delete_memslot(kvm, mem, &old, as_id);
+ if (!mem->memory_size) {
+ if (!old || !old->npages)
+ return -EINVAL;
- new.as_id = as_id;
- new.id = id;
- new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
- new.npages = mem->memory_size >> PAGE_SHIFT;
- new.flags = mem->flags;
- new.userspace_addr = mem->userspace_addr;
+ if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages))
+ return -EIO;
- if (new.npages > KVM_MEM_MAX_NR_PAGES)
- return -EINVAL;
+ return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE);
+ }
- if (!old.npages) {
+ base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT);
+ npages = (mem->memory_size >> PAGE_SHIFT);
+
+ if (!old || !old->npages) {
change = KVM_MR_CREATE;
- new.dirty_bitmap = NULL;
+
+ /*
+ * To simplify KVM internals, the total number of pages across
+ * all memslots must fit in an unsigned long.
+ */
+ if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
+ return -EINVAL;
} else { /* Modify an existing slot. */
- if ((new.userspace_addr != old.userspace_addr) ||
- (new.npages != old.npages) ||
- ((new.flags ^ old.flags) & KVM_MEM_READONLY))
+ if ((mem->userspace_addr != old->userspace_addr) ||
+ (npages != old->npages) ||
+ ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
return -EINVAL;
- if (new.base_gfn != old.base_gfn)
+ if (base_gfn != old->base_gfn)
change = KVM_MR_MOVE;
- else if (new.flags != old.flags)
+ else if (mem->flags != old->flags)
change = KVM_MR_FLAGS_ONLY;
else /* Nothing to change. */
return 0;
-
- /* Copy dirty_bitmap from the current memslot. */
- new.dirty_bitmap = old.dirty_bitmap;
}
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
- /* Check for overlaps */
- kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) {
- if (tmp->id == id)
- continue;
- if (!((new.base_gfn + new.npages <= tmp->base_gfn) ||
- (new.base_gfn >= tmp->base_gfn + tmp->npages)))
- return -EEXIST;
- }
- }
+ if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
+ kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
+ return -EEXIST;
- /* Allocate/free page dirty bitmap as needed */
- if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
- new.dirty_bitmap = NULL;
- else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
- r = kvm_alloc_dirty_bitmap(&new);
- if (r)
- return r;
+ /* Allocate a slot that will persist in the memslot. */
+ new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
+ if (!new)
+ return -ENOMEM;
- if (kvm_dirty_log_manual_protect_and_init_set(kvm))
- bitmap_set(new.dirty_bitmap, 0, new.npages);
- }
+ new->as_id = as_id;
+ new->id = id;
+ new->base_gfn = base_gfn;
+ new->npages = npages;
+ new->flags = mem->flags;
+ new->userspace_addr = mem->userspace_addr;
- r = kvm_set_memslot(kvm, mem, &new, as_id, change);
+ r = kvm_set_memslot(kvm, old, new, change);
if (r)
- goto out_bitmap;
-
- if (old.dirty_bitmap && !new.dirty_bitmap)
- kvm_destroy_dirty_bitmap(&old);
- return 0;
-
-out_bitmap:
- if (new.dirty_bitmap && !old.dirty_bitmap)
- kvm_destroy_dirty_bitmap(&new);
+ kfree(new);
return r;
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
@@ -2092,21 +2219,30 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ u64 gen = slots->generation;
struct kvm_memory_slot *slot;
- int slot_index;
- slot = try_get_memslot(slots, vcpu->last_used_slot, gfn);
+ /*
+ * This also protects against using a memslot from a different address space,
+ * since different address spaces have different generation numbers.
+ */
+ if (unlikely(gen != vcpu->last_used_slot_gen)) {
+ vcpu->last_used_slot = NULL;
+ vcpu->last_used_slot_gen = gen;
+ }
+
+ slot = try_get_memslot(vcpu->last_used_slot, gfn);
if (slot)
return slot;
/*
* Fall back to searching all memslots. We purposely use
* search_memslots() instead of __gfn_to_memslot() to avoid
- * thrashing the VM-wide last_used_index in kvm_memslots.
+ * thrashing the VM-wide last_used_slot in kvm_memslots.
*/
- slot = search_memslots(slots, gfn, &slot_index);
+ slot = search_memslots(slots, gfn, false);
if (slot) {
- vcpu->last_used_slot = slot_index;
+ vcpu->last_used_slot = slot;
return slot;
}
@@ -2154,12 +2290,12 @@ out:
return size;
}
-static bool memslot_is_readonly(struct kvm_memory_slot *slot)
+static bool memslot_is_readonly(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_READONLY;
}
-static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+static unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages, bool write)
{
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
@@ -2406,8 +2542,8 @@ out:
* 2): @write_fault = false && @writable, @writable will tell the caller
* whether the mapping is writable.
*/
-static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable)
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable)
{
struct vm_area_struct *vma;
kvm_pfn_t pfn = 0;
@@ -2454,7 +2590,7 @@ exit:
return pfn;
}
-kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
bool *writable, hva_t *hva)
{
@@ -2494,13 +2630,13 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
}
@@ -3019,15 +3155,22 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
EXPORT_SYMBOL_GPL(kvm_clear_guest);
void mark_page_dirty_in_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
+ const struct kvm_memory_slot *memslot,
gfn_t gfn)
{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
+ if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
+ return;
+#endif
+
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
u32 slot = (memslot->as_id << 16) | memslot->id;
if (kvm->dirty_ring_size)
- kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+ kvm_dirty_ring_push(&vcpu->dirty_ring,
slot, rel_gfn);
else
set_bit_le(rel_gfn, memslot->dirty_bitmap);
@@ -3139,68 +3282,93 @@ out:
return ret;
}
-static inline void
-update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+/*
+ * Block the vCPU until the vCPU is runnable, an event arrives, or a signal is
+ * pending. This is mostly used when halting a vCPU, but may also be used
+ * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
+ */
+bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
- if (waited)
- vcpu->stat.generic.halt_poll_fail_ns += poll_ns;
- else
- vcpu->stat.generic.halt_poll_success_ns += poll_ns;
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+ bool waited = false;
+
+ vcpu->stat.generic.blocking = 1;
+
+ kvm_arch_vcpu_blocking(vcpu);
+
+ prepare_to_rcuwait(wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (kvm_vcpu_check_block(vcpu) < 0)
+ break;
+
+ waited = true;
+ schedule();
+ }
+ finish_rcuwait(wait);
+
+ kvm_arch_vcpu_unblocking(vcpu);
+
+ vcpu->stat.generic.blocking = 0;
+
+ return waited;
+}
+
+static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
+ ktime_t end, bool success)
+{
+ struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic;
+ u64 poll_ns = ktime_to_ns(ktime_sub(end, start));
+
+ ++vcpu->stat.generic.halt_attempted_poll;
+
+ if (success) {
+ ++vcpu->stat.generic.halt_successful_poll;
+
+ if (!vcpu_valid_wakeup(vcpu))
+ ++vcpu->stat.generic.halt_poll_invalid;
+
+ stats->halt_poll_success_ns += poll_ns;
+ KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns);
+ } else {
+ stats->halt_poll_fail_ns += poll_ns;
+ KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns);
+ }
}
/*
- * The vCPU has executed a HLT instruction with in-kernel mode enabled.
+ * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt
+ * polling is enabled, busy wait for a short time before blocking to avoid the
+ * expensive block+unblock sequence if a wake event arrives soon after the vCPU
+ * is halted.
*/
-void kvm_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
+ bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
+ bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
ktime_t start, cur, poll_end;
bool waited = false;
- u64 block_ns;
-
- kvm_arch_vcpu_blocking(vcpu);
+ u64 halt_ns;
start = cur = poll_end = ktime_get();
- if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
- ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+ if (do_halt_poll) {
+ ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
- ++vcpu->stat.generic.halt_attempted_poll;
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
* arrives.
*/
- if (kvm_vcpu_check_block(vcpu) < 0) {
- ++vcpu->stat.generic.halt_successful_poll;
- if (!vcpu_valid_wakeup(vcpu))
- ++vcpu->stat.generic.halt_poll_invalid;
-
- KVM_STATS_LOG_HIST_UPDATE(
- vcpu->stat.generic.halt_poll_success_hist,
- ktime_to_ns(ktime_get()) -
- ktime_to_ns(start));
+ if (kvm_vcpu_check_block(vcpu) < 0)
goto out;
- }
cpu_relax();
poll_end = cur = ktime_get();
} while (kvm_vcpu_can_poll(cur, stop));
-
- KVM_STATS_LOG_HIST_UPDATE(
- vcpu->stat.generic.halt_poll_fail_hist,
- ktime_to_ns(ktime_get()) - ktime_to_ns(start));
}
+ waited = kvm_vcpu_block(vcpu);
- prepare_to_rcuwait(&vcpu->wait);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (kvm_vcpu_check_block(vcpu) < 0)
- break;
-
- waited = true;
- schedule();
- }
- finish_rcuwait(&vcpu->wait);
cur = ktime_get();
if (waited) {
vcpu->stat.generic.halt_wait_ns +=
@@ -3209,42 +3377,43 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
ktime_to_ns(cur) - ktime_to_ns(poll_end));
}
out:
- kvm_arch_vcpu_unblocking(vcpu);
- block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+ /* The total time the vCPU was "halted", including polling time. */
+ halt_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- update_halt_poll_stats(
- vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+ /*
+ * Note, halt-polling is considered successful so long as the vCPU was
+ * never actually scheduled out, i.e. even if the wake event arrived
+ * after of the halt-polling loop itself, but before the full wait.
+ */
+ if (do_halt_poll)
+ update_halt_poll_stats(vcpu, start, poll_end, !waited);
- if (!kvm_arch_no_poll(vcpu)) {
+ if (halt_poll_allowed) {
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
} else if (vcpu->kvm->max_halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
+ if (halt_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
else if (vcpu->halt_poll_ns &&
- block_ns > vcpu->kvm->max_halt_poll_ns)
+ halt_ns > vcpu->kvm->max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
- block_ns < vcpu->kvm->max_halt_poll_ns)
+ halt_ns < vcpu->kvm->max_halt_poll_ns)
grow_halt_poll_ns(vcpu);
} else {
vcpu->halt_poll_ns = 0;
}
}
- trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
- kvm_arch_vcpu_block_finish(vcpu);
+ trace_kvm_vcpu_wakeup(halt_ns, waited, vcpu_valid_wakeup(vcpu));
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_block);
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- struct rcuwait *waitp;
-
- waitp = kvm_arch_vcpu_get_wait(vcpu);
- if (rcuwait_wake_up(waitp)) {
+ if (__kvm_vcpu_wake_up(vcpu)) {
WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.generic.halt_wakeup;
return true;
@@ -3265,6 +3434,19 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
if (kvm_vcpu_wake_up(vcpu))
return;
+ me = get_cpu();
+ /*
+ * The only state change done outside the vcpu mutex is IN_GUEST_MODE
+ * to EXITING_GUEST_MODE. Therefore the moderately expensive "should
+ * kick" check does not need atomic operations if kvm_vcpu_kick is used
+ * within the vCPU thread itself.
+ */
+ if (vcpu == __this_cpu_read(kvm_running_vcpu)) {
+ if (vcpu->mode == IN_GUEST_MODE)
+ WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE);
+ goto out;
+ }
+
/*
* Note, the vCPU could get migrated to a different pCPU at any point
* after kvm_arch_vcpu_should_kick(), which could result in sending an
@@ -3272,12 +3454,12 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
* IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
* vCPU also requires it to leave IN_GUEST_MODE.
*/
- me = get_cpu();
if (kvm_arch_vcpu_should_kick(vcpu)) {
cpu = READ_ONCE(vcpu->cpu);
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu);
}
+out:
put_cpu();
}
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
@@ -3375,10 +3557,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
struct kvm *kvm = me->kvm;
struct kvm_vcpu *vcpu;
int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
+ unsigned long i;
int yielded = 0;
int try = 3;
int pass;
- int i;
kvm_vcpu_set_in_spin_loop(me, true);
/*
@@ -3399,8 +3581,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (rcuwait_active(&vcpu->wait) &&
- !vcpu_dy_runnable(vcpu))
+ if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
!kvm_arch_dy_has_pending_interrupt(vcpu) &&
@@ -3429,7 +3610,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
{
-#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
(pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
kvm->dirty_ring_size / PAGE_SIZE);
@@ -3585,7 +3766,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
}
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
- BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
+ r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+ BUG_ON(r == -EBUSY);
+ if (r)
+ goto unlock_vcpu_destroy;
/* Fill the stats id string for the vcpu */
snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
@@ -3595,15 +3779,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0) {
+ xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
kvm_put_kvm_no_destroy(kvm);
goto unlock_vcpu_destroy;
}
- kvm->vcpus[vcpu->vcpu_idx] = vcpu;
-
/*
- * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
- * before kvm->online_vcpu's incremented value.
+ * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu
+ * pointer before kvm->online_vcpu's incremented value.
*/
smp_wmb();
atomic_inc(&kvm->online_vcpus);
@@ -4132,7 +4315,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_NR_MEMSLOTS:
return KVM_USER_MEM_SLOTS;
case KVM_CAP_DIRTY_LOG_RING:
-#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
#else
return 0;
@@ -4185,7 +4368,7 @@ static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
int cleared = 0;
@@ -5104,7 +5287,7 @@ static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
*val = 0;
@@ -5117,7 +5300,7 @@ static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
new file mode 100644
index 000000000000..34ca40823260
--- /dev/null
+++ b/virt/kvm/kvm_mm.h
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef __KVM_MM_H__
+#define __KVM_MM_H__ 1
+
+/*
+ * Architectures can choose whether to use an rwlock or spinlock
+ * for the mmu_lock. These macros, for use in common code
+ * only, avoids using #ifdefs in places that must deal with
+ * multiple architectures.
+ */
+
+#ifdef KVM_HAVE_MMU_RWLOCK
+#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
+#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
+#define KVM_MMU_READ_LOCK(kvm) read_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_READ_UNLOCK(kvm) read_unlock(&(kvm)->mmu_lock)
+#else
+#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
+#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
+#define KVM_MMU_READ_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
+#endif /* KVM_HAVE_MMU_RWLOCK */
+
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable);
+
+#ifdef CONFIG_HAVE_KVM_PFNCACHE
+void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ bool may_block);
+#else
+static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ bool may_block)
+{
+}
+#endif /* HAVE_KVM_PFNCACHE */
+
+#endif /* __KVM_MM_H__ */
diff --git a/virt/kvm/mmu_lock.h b/virt/kvm/mmu_lock.h
deleted file mode 100644
index 9e1308f9734c..000000000000
--- a/virt/kvm/mmu_lock.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#ifndef KVM_MMU_LOCK_H
-#define KVM_MMU_LOCK_H 1
-
-/*
- * Architectures can choose whether to use an rwlock or spinlock
- * for the mmu_lock. These macros, for use in common code
- * only, avoids using #ifdefs in places that must deal with
- * multiple architectures.
- */
-
-#ifdef KVM_HAVE_MMU_RWLOCK
-#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
-#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
-#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
-#else
-#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
-#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
-#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
-#endif /* KVM_HAVE_MMU_RWLOCK */
-
-#endif
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
new file mode 100644
index 000000000000..ce878f4be4da
--- /dev/null
+++ b/virt/kvm/pfncache.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables kernel and guest-mode vCPU access to guest physical
+ * memory with suitable invalidation mechanisms.
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Authors:
+ * David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+
+#include "kvm_mm.h"
+
+/*
+ * MMU notifier 'invalidate_range_start' hook.
+ */
+void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
+ unsigned long end, bool may_block)
+{
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ struct gfn_to_pfn_cache *gpc;
+ bool wake_vcpus = false;
+
+ spin_lock(&kvm->gpc_lock);
+ list_for_each_entry(gpc, &kvm->gpc_list, list) {
+ write_lock_irq(&gpc->lock);
+
+ /* Only a single page so no need to care about length */
+ if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
+ gpc->uhva >= start && gpc->uhva < end) {
+ gpc->valid = false;
+
+ /*
+ * If a guest vCPU could be using the physical address,
+ * it needs to be woken.
+ */
+ if (gpc->guest_uses_pa) {
+ if (!wake_vcpus) {
+ wake_vcpus = true;
+ bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
+ }
+ __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
+ }
+
+ /*
+ * We cannot call mark_page_dirty() from here because
+ * this physical CPU might not have an active vCPU
+ * with which to do the KVM dirty tracking.
+ *
+ * Neither is there any point in telling the kernel MM
+ * that the underlying page is dirty. A vCPU in guest
+ * mode might still be writing to it up to the point
+ * where we wake them a few lines further down anyway.
+ *
+ * So all the dirty marking happens on the unmap.
+ */
+ }
+ write_unlock_irq(&gpc->lock);
+ }
+ spin_unlock(&kvm->gpc_lock);
+
+ if (wake_vcpus) {
+ unsigned int req = KVM_REQ_GPC_INVALIDATE;
+ bool called;
+
+ /*
+ * If the OOM reaper is active, then all vCPUs should have
+ * been stopped already, so perform the request without
+ * KVM_REQUEST_WAIT and be sad if any needed to be woken.
+ */
+ if (!may_block)
+ req &= ~KVM_REQUEST_WAIT;
+
+ called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
+
+ WARN_ON_ONCE(called && !may_block);
+ }
+}
+
+bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ gpa_t gpa, unsigned long len)
+{
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+
+ if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+ return false;
+
+ if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+ kvm_is_error_hva(gpc->uhva))
+ return false;
+
+ if (!gpc->valid)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
+
+static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
+ gpa_t gpa, bool dirty)
+{
+ /* Unmap the old page if it was mapped before, and release it */
+ if (!is_error_noslot_pfn(pfn)) {
+ if (khva) {
+ if (pfn_valid(pfn))
+ kunmap(pfn_to_page(pfn));
+#ifdef CONFIG_HAS_IOMEM
+ else
+ memunmap(khva);
+#endif
+ }
+
+ kvm_release_pfn(pfn, dirty);
+ if (dirty)
+ mark_page_dirty(kvm, gpa);
+ }
+}
+
+static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
+{
+ unsigned long mmu_seq;
+ kvm_pfn_t new_pfn;
+ int retry;
+
+ do {
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /* We always request a writeable mapping */
+ new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
+ if (is_error_noslot_pfn(new_pfn))
+ break;
+
+ KVM_MMU_READ_LOCK(kvm);
+ retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
+ KVM_MMU_READ_UNLOCK(kvm);
+ if (!retry)
+ break;
+
+ cond_resched();
+ } while (1);
+
+ return new_pfn;
+}
+
+int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ gpa_t gpa, unsigned long len, bool dirty)
+{
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ unsigned long page_offset = gpa & ~PAGE_MASK;
+ kvm_pfn_t old_pfn, new_pfn;
+ unsigned long old_uhva;
+ gpa_t old_gpa;
+ void *old_khva;
+ bool old_valid, old_dirty;
+ int ret = 0;
+
+ /*
+ * If must fit within a single page. The 'len' argument is
+ * only to enforce that.
+ */
+ if (page_offset + len > PAGE_SIZE)
+ return -EINVAL;
+
+ write_lock_irq(&gpc->lock);
+
+ old_gpa = gpc->gpa;
+ old_pfn = gpc->pfn;
+ old_khva = gpc->khva - offset_in_page(gpc->khva);
+ old_uhva = gpc->uhva;
+ old_valid = gpc->valid;
+ old_dirty = gpc->dirty;
+
+ /* If the userspace HVA is invalid, refresh that first */
+ if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+ kvm_is_error_hva(gpc->uhva)) {
+ gfn_t gfn = gpa_to_gfn(gpa);
+
+ gpc->dirty = false;
+ gpc->gpa = gpa;
+ gpc->generation = slots->generation;
+ gpc->memslot = __gfn_to_memslot(slots, gfn);
+ gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+
+ if (kvm_is_error_hva(gpc->uhva)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ gpc->uhva += page_offset;
+ }
+
+ /*
+ * If the userspace HVA changed or the PFN was already invalid,
+ * drop the lock and do the HVA to PFN lookup again.
+ */
+ if (!old_valid || old_uhva != gpc->uhva) {
+ unsigned long uhva = gpc->uhva;
+ void *new_khva = NULL;
+
+ /* Placeholders for "hva is valid but not yet mapped" */
+ gpc->pfn = KVM_PFN_ERR_FAULT;
+ gpc->khva = NULL;
+ gpc->valid = true;
+
+ write_unlock_irq(&gpc->lock);
+
+ new_pfn = hva_to_pfn_retry(kvm, uhva);
+ if (is_error_noslot_pfn(new_pfn)) {
+ ret = -EFAULT;
+ goto map_done;
+ }
+
+ if (gpc->kernel_map) {
+ if (new_pfn == old_pfn) {
+ new_khva = old_khva;
+ old_pfn = KVM_PFN_ERR_FAULT;
+ old_khva = NULL;
+ } else if (pfn_valid(new_pfn)) {
+ new_khva = kmap(pfn_to_page(new_pfn));
+#ifdef CONFIG_HAS_IOMEM
+ } else {
+ new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
+#endif
+ }
+ if (new_khva)
+ new_khva += page_offset;
+ else
+ ret = -EFAULT;
+ }
+
+ map_done:
+ write_lock_irq(&gpc->lock);
+ if (ret) {
+ gpc->valid = false;
+ gpc->pfn = KVM_PFN_ERR_FAULT;
+ gpc->khva = NULL;
+ } else {
+ /* At this point, gpc->valid may already have been cleared */
+ gpc->pfn = new_pfn;
+ gpc->khva = new_khva;
+ }
+ } else {
+ /* If the HVA→PFN mapping was already valid, don't unmap it. */
+ old_pfn = KVM_PFN_ERR_FAULT;
+ old_khva = NULL;
+ }
+
+ out:
+ if (ret)
+ gpc->dirty = false;
+ else
+ gpc->dirty = dirty;
+
+ write_unlock_irq(&gpc->lock);
+
+ __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
+
+void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+{
+ void *old_khva;
+ kvm_pfn_t old_pfn;
+ bool old_dirty;
+ gpa_t old_gpa;
+
+ write_lock_irq(&gpc->lock);
+
+ gpc->valid = false;
+
+ old_khva = gpc->khva - offset_in_page(gpc->khva);
+ old_dirty = gpc->dirty;
+ old_gpa = gpc->gpa;
+ old_pfn = gpc->pfn;
+
+ /*
+ * We can leave the GPA → uHVA map cache intact but the PFN
+ * lookup will need to be redone even for the same page.
+ */
+ gpc->khva = NULL;
+ gpc->pfn = KVM_PFN_ERR_FAULT;
+
+ write_unlock_irq(&gpc->lock);
+
+ __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
+}
+EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
+
+
+int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ struct kvm_vcpu *vcpu, bool guest_uses_pa,
+ bool kernel_map, gpa_t gpa, unsigned long len,
+ bool dirty)
+{
+ if (!gpc->active) {
+ rwlock_init(&gpc->lock);
+
+ gpc->khva = NULL;
+ gpc->pfn = KVM_PFN_ERR_FAULT;
+ gpc->uhva = KVM_HVA_ERR_BAD;
+ gpc->vcpu = vcpu;
+ gpc->kernel_map = kernel_map;
+ gpc->guest_uses_pa = guest_uses_pa;
+ gpc->valid = false;
+ gpc->active = true;
+
+ spin_lock(&kvm->gpc_lock);
+ list_add(&gpc->list, &kvm->gpc_list);
+ spin_unlock(&kvm->gpc_lock);
+ }
+ return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
+}
+EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
+
+void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+{
+ if (gpc->active) {
+ spin_lock(&kvm->gpc_lock);
+ list_del(&gpc->list);
+ spin_unlock(&kvm->gpc_lock);
+
+ kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
+ gpc->active = false;
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);